{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "babb934f",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import pandas as pd\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "f4a35b0f",
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_all_files(folder_name):\n",
    "    # Change the directory\n",
    "    os.chdir(folder_name)\n",
    "    # iterate through all file\n",
    "    file_path_list =[]\n",
    "    for file in os.listdir():\n",
    "        print(file)\n",
    "        file_path = f\"{folder_name}/{file}\"\n",
    "        file_path_list.append(file_path)\n",
    "    return file_path_list"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e45f50b2",
   "metadata": {},
   "source": [
    "### 1. NQ Dataset Analysis"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "38ed23d5",
   "metadata": {},
   "outputs": [],
   "source": [
    "doc_folder = '/Users/abhilashamangal/Documents/Semantic Search/data/doc-nq910'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "87d87331",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "questions.tsv\n",
      "docs.tsv\n"
     ]
    }
   ],
   "source": [
    "files = get_all_files(doc_folder)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "64d93250",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_question = pd.read_csv(files[0],sep = '\\t') "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "e140b487",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>text</th>\n",
       "      <th>relevant</th>\n",
       "      <th>answers</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>153</td>\n",
       "      <td>what episode in victorious is give it up</td>\n",
       "      <td>1</td>\n",
       "      <td>Freak the Freak Out</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>7043</td>\n",
       "      <td>malcolm in the middle what is their last name</td>\n",
       "      <td>14</td>\n",
       "      <td>Wilkerson</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>4392</td>\n",
       "      <td>distance from las vegas to red wood forest</td>\n",
       "      <td>16</td>\n",
       "      <td>15 miles</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>8260</td>\n",
       "      <td>what kind of animal is boots from dora</td>\n",
       "      <td>18</td>\n",
       "      <td>anthropomorphic monkey</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>6740</td>\n",
       "      <td>where did the rockefeller tree come from 2014</td>\n",
       "      <td>21</td>\n",
       "      <td>Danville , PA</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     id                                           text  relevant  \\\n",
       "0   153       what episode in victorious is give it up         1   \n",
       "1  7043  malcolm in the middle what is their last name        14   \n",
       "2  4392     distance from las vegas to red wood forest        16   \n",
       "3  8260         what kind of animal is boots from dora        18   \n",
       "4  6740  where did the rockefeller tree come from 2014        21   \n",
       "\n",
       "                  answers  \n",
       "0     Freak the Freak Out  \n",
       "1               Wilkerson  \n",
       "2                15 miles  \n",
       "3  anthropomorphic monkey  \n",
       "4           Danville , PA  "
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_question.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "9c5bc283",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "910"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(df_question)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "0e3dfd5f",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_docs = pd.read_csv(files[1],sep = '\\t') "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "1c031686",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>text</th>\n",
       "      <th>title</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>List of songs in Victorious - wikipedia &lt;H1&gt; L...</td>\n",
       "      <td>List of songs in Victorious</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>Income tax in India - Wikipedia &lt;H1&gt; Income ta...</td>\n",
       "      <td>Income tax in India</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>Federalism in India - wikipedia &lt;H1&gt; Federalis...</td>\n",
       "      <td>Federalism in India</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>Ancient Roman architecture - wikipedia &lt;H1&gt; An...</td>\n",
       "      <td>Ancient Roman architecture</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>Control character - wikipedia &lt;H1&gt; Control cha...</td>\n",
       "      <td>Control character</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   id                                               text  \\\n",
       "0   1  List of songs in Victorious - wikipedia <H1> L...   \n",
       "1   2  Income tax in India - Wikipedia <H1> Income ta...   \n",
       "2   3  Federalism in India - wikipedia <H1> Federalis...   \n",
       "3   4  Ancient Roman architecture - wikipedia <H1> An...   \n",
       "4   5  Control character - wikipedia <H1> Control cha...   \n",
       "\n",
       "                         title  \n",
       "0  List of songs in Victorious  \n",
       "1          Income tax in India  \n",
       "2          Federalism in India  \n",
       "3   Ancient Roman architecture  \n",
       "4            Control character  "
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_docs.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "743f9aca",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3367"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(df_docs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "c9d00e9a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0                     Freak the Freak Out\n",
       "1                               Wilkerson\n",
       "2                                15 miles\n",
       "3                  anthropomorphic monkey\n",
       "4                           Danville , PA\n",
       "                      ...                \n",
       "905                                  2010\n",
       "906                     Dead Man 's Chest\n",
       "907                                  1859\n",
       "908                   Washington Capitals\n",
       "909    Tân Sơn Nhất International Airport\n",
       "Name: answers, Length: 910, dtype: object"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_question['answers']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "9f9b9009",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_question['answers'].isnull().sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "87d0aba0",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "5d191173",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processing i 1\n",
      "Token Length--- 2016\n",
      "Processing i 2\n",
      "Token Length--- 12432\n",
      "Processing i 3\n",
      "Token Length--- 6924\n",
      "Processing i 4\n",
      "Token Length--- 19372\n",
      "Processing i 5\n",
      "Token Length--- 7779\n",
      "Processing i 6\n",
      "Token Length--- 4303\n",
      "Processing i 7\n",
      "Token Length--- 4034\n",
      "Processing i 8\n",
      "Token Length--- 5482\n",
      "Processing i 9\n",
      "Token Length--- 8454\n",
      "Processing i 10\n",
      "Token Length--- 9085\n",
      "Processing i 11\n",
      "Token Length--- 26497\n",
      "Processing i 12\n",
      "Token Length--- 3932\n",
      "Processing i 13\n",
      "Token Length--- 17798\n",
      "Processing i 14\n",
      "Token Length--- 11338\n",
      "Processing i 15\n",
      "Token Length--- 2005\n",
      "Processing i 16\n",
      "Token Length--- 7178\n",
      "Processing i 17\n",
      "Token Length--- 18241\n",
      "Processing i 18\n",
      "Token Length--- 8286\n",
      "Processing i 19\n",
      "Token Length--- 8823\n",
      "Processing i 20\n",
      "Token Length--- 6744\n",
      "Processing i 21\n",
      "Token Length--- 5660\n",
      "Processing i 22\n",
      "Token Length--- 5292\n",
      "Processing i 23\n",
      "Token Length--- 3981\n",
      "Processing i 24\n",
      "Token Length--- 6100\n",
      "Processing i 25\n",
      "Token Length--- 17179\n",
      "Processing i 26\n",
      "Token Length--- 5711\n",
      "Processing i 27\n",
      "Token Length--- 3849\n",
      "Processing i 28\n",
      "Token Length--- 29860\n",
      "Processing i 29\n",
      "Token Length--- 32287\n",
      "Processing i 30\n",
      "Token Length--- 6385\n",
      "Processing i 31\n",
      "Token Length--- 4377\n",
      "Processing i 32\n",
      "Token Length--- 3573\n",
      "Processing i 33\n",
      "Token Length--- 5506\n",
      "Processing i 34\n",
      "Token Length--- 9218\n",
      "Processing i 35\n",
      "Token Length--- 1411\n",
      "Processing i 36\n",
      "Token Length--- 1304\n",
      "Processing i 37\n",
      "Token Length--- 4448\n",
      "Processing i 38\n",
      "Token Length--- 10064\n",
      "Processing i 39\n",
      "Token Length--- 12523\n",
      "Processing i 40\n",
      "Token Length--- 2058\n",
      "Processing i 41\n",
      "Token Length--- 4798\n",
      "Processing i 42\n",
      "Token Length--- 25522\n",
      "Processing i 43\n",
      "Token Length--- 25518\n",
      "Processing i 44\n",
      "Token Length--- 1675\n",
      "Processing i 45\n",
      "Token Length--- 3258\n",
      "Processing i 46\n",
      "Token Length--- 6850\n",
      "Processing i 47\n",
      "Token Length--- 3456\n",
      "Processing i 48\n",
      "Token Length--- 12074\n",
      "Processing i 49\n",
      "Token Length--- 18498\n",
      "Processing i 50\n",
      "Token Length--- 3686\n",
      "Processing i 51\n",
      "Token Length--- 4803\n",
      "Processing i 52\n",
      "Token Length--- 6317\n",
      "Processing i 53\n",
      "Token Length--- 1372\n",
      "Processing i 54\n",
      "Token Length--- 24297\n",
      "Processing i 55\n",
      "Token Length--- 1316\n",
      "Processing i 56\n",
      "Token Length--- 3609\n",
      "Processing i 57\n",
      "Token Length--- 10689\n",
      "Processing i 58\n",
      "Token Length--- 1016\n",
      "Processing i 59\n",
      "Token Length--- 4284\n",
      "Processing i 60\n",
      "Token Length--- 9721\n",
      "Processing i 61\n",
      "Token Length--- 11489\n",
      "Processing i 62\n",
      "Token Length--- 20590\n",
      "Processing i 63\n",
      "Token Length--- 10763\n",
      "Processing i 64\n",
      "Token Length--- 14234\n",
      "Processing i 65\n",
      "Token Length--- 16831\n",
      "Processing i 66\n",
      "Token Length--- 13426\n",
      "Processing i 67\n",
      "Token Length--- 19147\n",
      "Processing i 68\n",
      "Token Length--- 4746\n",
      "Processing i 69\n",
      "Token Length--- 1333\n",
      "Processing i 70\n",
      "Token Length--- 23102\n",
      "Processing i 71\n",
      "Token Length--- 5187\n",
      "Processing i 72\n",
      "Token Length--- 20994\n",
      "Processing i 73\n",
      "Token Length--- 4338\n",
      "Processing i 74\n",
      "Token Length--- 7435\n",
      "Processing i 75\n",
      "Token Length--- 12177\n",
      "Processing i 76\n",
      "Token Length--- 2580\n",
      "Processing i 77\n",
      "Token Length--- 8027\n",
      "Processing i 78\n",
      "Token Length--- 4106\n",
      "Processing i 79\n",
      "Token Length--- 561\n",
      "Processing i 80\n",
      "Token Length--- 5906\n",
      "Processing i 81\n",
      "Token Length--- 3976\n",
      "Processing i 82\n",
      "Token Length--- 19575\n",
      "Processing i 83\n",
      "Token Length--- 772\n",
      "Processing i 84\n",
      "Token Length--- 8468\n",
      "Processing i 85\n",
      "Token Length--- 61891\n",
      "Processing i 86\n",
      "Token Length--- 3519\n",
      "Processing i 87\n",
      "Token Length--- 7141\n",
      "Processing i 88\n",
      "Token Length--- 2379\n",
      "Processing i 89\n",
      "Token Length--- 29449\n",
      "Processing i 90\n",
      "Token Length--- 1148\n",
      "Processing i 91\n",
      "Token Length--- 2345\n",
      "Processing i 92\n",
      "Token Length--- 5083\n",
      "Processing i 93\n",
      "Token Length--- 12617\n",
      "Processing i 94\n",
      "Token Length--- 5623\n",
      "Processing i 95\n",
      "Token Length--- 8155\n",
      "Processing i 96\n",
      "Token Length--- 10888\n",
      "Processing i 97\n",
      "Token Length--- 967\n",
      "Processing i 98\n",
      "Token Length--- 8792\n",
      "Processing i 99\n",
      "Token Length--- 3930\n",
      "Processing i 100\n",
      "Token Length--- 7237\n",
      "Processing i 101\n",
      "Token Length--- 5677\n",
      "Processing i 102\n",
      "Token Length--- 7017\n",
      "Processing i 103\n",
      "Token Length--- 2652\n",
      "Processing i 104\n",
      "Token Length--- 3910\n",
      "Processing i 105\n",
      "Token Length--- 27252\n",
      "Processing i 106\n",
      "Token Length--- 9610\n",
      "Processing i 107\n",
      "Token Length--- 13031\n",
      "Processing i 108\n",
      "Token Length--- 3182\n",
      "Processing i 109\n",
      "Token Length--- 2266\n",
      "Processing i 110\n",
      "Token Length--- 7999\n",
      "Processing i 111\n",
      "Token Length--- 35204\n",
      "Processing i 112\n",
      "Token Length--- 2152\n",
      "Processing i 113\n",
      "Token Length--- 3855\n",
      "Processing i 114\n",
      "Token Length--- 4032\n",
      "Processing i 115\n",
      "Token Length--- 2421\n",
      "Processing i 116\n",
      "Token Length--- 2121\n",
      "Processing i 117\n",
      "Token Length--- 4145\n",
      "Processing i 118\n",
      "Token Length--- 7631\n",
      "Processing i 119\n",
      "Token Length--- 31412\n",
      "Processing i 120\n",
      "Token Length--- 3737\n",
      "Processing i 121\n",
      "Token Length--- 4491\n",
      "Processing i 122\n",
      "Token Length--- 30564\n",
      "Processing i 123\n",
      "Token Length--- 1285\n",
      "Processing i 124\n",
      "Token Length--- 2083\n",
      "Processing i 125\n",
      "Token Length--- 32612\n",
      "Processing i 126\n",
      "Token Length--- 2120\n",
      "Processing i 127\n",
      "Token Length--- 8989\n",
      "Processing i 128\n",
      "Token Length--- 3216\n",
      "Processing i 129\n",
      "Token Length--- 10989\n",
      "Processing i 130\n",
      "Token Length--- 5302\n",
      "Processing i 131\n",
      "Token Length--- 27514\n",
      "Processing i 132\n",
      "Token Length--- 3103\n",
      "Processing i 133\n",
      "Token Length--- 11215\n",
      "Processing i 134\n",
      "Token Length--- 9320\n",
      "Processing i 135\n",
      "Token Length--- 18352\n",
      "Processing i 136\n",
      "Token Length--- 1877\n",
      "Processing i 137\n",
      "Token Length--- 3879\n",
      "Processing i 138\n",
      "Token Length--- 7955\n",
      "Processing i 139\n",
      "Token Length--- 6345\n",
      "Processing i 140\n",
      "Token Length--- 61891\n",
      "Processing i 141\n",
      "Token Length--- 30480\n",
      "Processing i 142\n",
      "Token Length--- 14290\n",
      "Processing i 143\n",
      "Token Length--- 5957\n",
      "Processing i 144\n",
      "Token Length--- 2154\n",
      "Processing i 145\n",
      "Token Length--- 9044\n",
      "Processing i 146\n",
      "Token Length--- 15571\n",
      "Processing i 147\n",
      "Token Length--- 6465\n",
      "Processing i 148\n",
      "Token Length--- 3916\n",
      "Processing i 149\n",
      "Token Length--- 19063\n",
      "Processing i 150\n",
      "Token Length--- 10133\n",
      "Processing i 151\n",
      "Token Length--- 10955\n",
      "Processing i 152\n",
      "Token Length--- 1256\n",
      "Processing i 153\n",
      "Token Length--- 5843\n",
      "Processing i 154\n",
      "Token Length--- 6454\n",
      "Processing i 155\n",
      "Token Length--- 3019\n",
      "Processing i 156\n",
      "Token Length--- 8976\n",
      "Processing i 157\n",
      "Token Length--- 2855\n",
      "Processing i 158\n",
      "Token Length--- 21717\n",
      "Processing i 159\n",
      "Token Length--- 5732\n",
      "Processing i 160\n",
      "Token Length--- 8290\n",
      "Processing i 161\n",
      "Token Length--- 5797\n",
      "Processing i 162\n",
      "Token Length--- 6063\n",
      "Processing i 163\n",
      "Token Length--- 7945\n",
      "Processing i 164\n",
      "Token Length--- 421\n",
      "Processing i 165\n",
      "Token Length--- 20082\n",
      "Processing i 166\n",
      "Token Length--- 4048\n",
      "Processing i 167\n",
      "Token Length--- 2056\n",
      "Processing i 168\n",
      "Token Length--- 877\n",
      "Processing i 169\n",
      "Token Length--- 3616\n",
      "Processing i 170\n",
      "Token Length--- 2862\n",
      "Processing i 171\n",
      "Token Length--- 18387\n",
      "Processing i 172\n",
      "Token Length--- 14987\n",
      "Processing i 173\n",
      "Token Length--- 6152\n",
      "Processing i 174\n",
      "Token Length--- 25433\n",
      "Processing i 175\n",
      "Token Length--- 1264\n",
      "Processing i 176\n",
      "Token Length--- 2173\n",
      "Processing i 177\n",
      "Token Length--- 7297\n",
      "Processing i 178\n",
      "Token Length--- 29515\n",
      "Processing i 179\n",
      "Token Length--- 3254\n",
      "Processing i 180\n",
      "Token Length--- 2096\n",
      "Processing i 181\n",
      "Token Length--- 790\n",
      "Processing i 182\n",
      "Token Length--- 8216\n",
      "Processing i 183\n",
      "Token Length--- 13133\n",
      "Processing i 184\n",
      "Token Length--- 11899\n",
      "Processing i 185\n",
      "Token Length--- 4867\n",
      "Processing i 186\n",
      "Token Length--- 4389\n",
      "Processing i 187\n",
      "Token Length--- 23009\n",
      "Processing i 188\n",
      "Token Length--- 4487\n",
      "Processing i 189\n",
      "Token Length--- 13909\n",
      "Processing i 190\n",
      "Token Length--- 6567\n",
      "Processing i 191\n",
      "Token Length--- 10067\n",
      "Processing i 192\n",
      "Token Length--- 10074\n",
      "Processing i 193\n",
      "Token Length--- 28490\n",
      "Processing i 194\n",
      "Token Length--- 4434\n",
      "Processing i 195\n",
      "Token Length--- 17821\n",
      "Processing i 196\n",
      "Token Length--- 1928\n",
      "Processing i 197\n",
      "Token Length--- 4041\n",
      "Processing i 198\n",
      "Token Length--- 15305\n",
      "Processing i 199\n",
      "Token Length--- 1870\n",
      "Processing i 200\n",
      "Token Length--- 5695\n",
      "Processing i 201\n",
      "Token Length--- 7447\n",
      "Processing i 202\n",
      "Token Length--- 26994\n",
      "Processing i 203\n",
      "Token Length--- 2540\n",
      "Processing i 204\n",
      "Token Length--- 5979\n",
      "Processing i 205\n",
      "Token Length--- 1157\n",
      "Processing i 206\n",
      "Token Length--- 14555\n",
      "Processing i 207\n",
      "Token Length--- 3997\n",
      "Processing i 208\n",
      "Token Length--- 2815\n",
      "Processing i 209\n",
      "Token Length--- 18631\n",
      "Processing i 210\n",
      "Token Length--- 13663\n",
      "Processing i 211\n",
      "Token Length--- 3084\n",
      "Processing i 212\n",
      "Token Length--- 5694\n",
      "Processing i 213\n",
      "Token Length--- 7172\n",
      "Processing i 214\n",
      "Token Length--- 15046\n",
      "Processing i 215\n",
      "Token Length--- 5858\n",
      "Processing i 216\n",
      "Token Length--- 2040\n",
      "Processing i 217\n",
      "Token Length--- 6428\n",
      "Processing i 218\n",
      "Token Length--- 8653\n",
      "Processing i 219\n",
      "Token Length--- 11167\n",
      "Processing i 220\n",
      "Token Length--- 15392\n",
      "Processing i 221\n",
      "Token Length--- 14475\n",
      "Processing i 222\n",
      "Token Length--- 13741\n",
      "Processing i 223\n",
      "Token Length--- 1811\n",
      "Processing i 224\n",
      "Token Length--- 37804\n",
      "Processing i 225\n",
      "Token Length--- 8369\n",
      "Processing i 226\n",
      "Token Length--- 38739\n",
      "Processing i 227\n",
      "Token Length--- 3266\n",
      "Processing i 228\n",
      "Token Length--- 961\n",
      "Processing i 229\n",
      "Token Length--- 15211\n",
      "Processing i 230\n",
      "Token Length--- 1892\n",
      "Processing i 231\n",
      "Token Length--- 6087\n",
      "Processing i 232\n",
      "Token Length--- 2092\n",
      "Processing i 233\n",
      "Token Length--- 1500\n",
      "Processing i 234\n",
      "Token Length--- 2121\n",
      "Processing i 235\n",
      "Token Length--- 25849\n",
      "Processing i 236\n",
      "Token Length--- 8872\n",
      "Processing i 237\n",
      "Token Length--- 2123\n",
      "Processing i 238\n",
      "Token Length--- 5630\n",
      "Processing i 239\n",
      "Token Length--- 5204\n",
      "Processing i 240\n",
      "Token Length--- 8488\n",
      "Processing i 241\n",
      "Token Length--- 28747\n",
      "Processing i 242\n",
      "Token Length--- 9307\n",
      "Processing i 243\n",
      "Token Length--- 20108\n",
      "Processing i 244\n",
      "Token Length--- 1440\n",
      "Processing i 245\n",
      "Token Length--- 13969\n",
      "Processing i 246\n",
      "Token Length--- 2812\n",
      "Processing i 247\n",
      "Token Length--- 6679\n",
      "Processing i 248\n",
      "Token Length--- 13552\n",
      "Processing i 249\n",
      "Token Length--- 30916\n",
      "Processing i 250\n",
      "Token Length--- 9017\n",
      "Processing i 251\n",
      "Token Length--- 1409\n",
      "Processing i 252\n",
      "Token Length--- 8758\n",
      "Processing i 253\n",
      "Token Length--- 5783\n",
      "Processing i 254\n",
      "Token Length--- 13795\n",
      "Processing i 255\n",
      "Token Length--- 7504\n",
      "Processing i 256\n",
      "Token Length--- 16415\n",
      "Processing i 257\n",
      "Token Length--- 3207\n",
      "Processing i 258\n",
      "Token Length--- 5674\n",
      "Processing i 259\n",
      "Token Length--- 21827\n",
      "Processing i 260\n",
      "Token Length--- 31742\n",
      "Processing i 261\n",
      "Token Length--- 26134\n",
      "Processing i 262\n",
      "Token Length--- 11535\n",
      "Processing i 263\n",
      "Token Length--- 1410\n",
      "Processing i 264\n",
      "Token Length--- 9205\n",
      "Processing i 265\n",
      "Token Length--- 2440\n",
      "Processing i 266\n",
      "Token Length--- 19744\n",
      "Processing i 267\n",
      "Token Length--- 9443\n",
      "Processing i 268\n",
      "Token Length--- 6968\n",
      "Processing i 269\n",
      "Token Length--- 10610\n",
      "Processing i 270\n",
      "Token Length--- 6892\n",
      "Processing i 271\n",
      "Token Length--- 3321\n",
      "Processing i 272\n",
      "Token Length--- 3606\n",
      "Processing i 273\n",
      "Token Length--- 15699\n",
      "Processing i 274\n",
      "Token Length--- 7893\n",
      "Processing i 275\n",
      "Token Length--- 34718\n",
      "Processing i 276\n",
      "Token Length--- 21124\n",
      "Processing i 277\n",
      "Token Length--- 745\n",
      "Processing i 278\n",
      "Token Length--- 1873\n",
      "Processing i 279\n",
      "Token Length--- 13868\n",
      "Processing i 280\n",
      "Token Length--- 3792\n",
      "Processing i 281\n",
      "Token Length--- 13524\n",
      "Processing i 282\n",
      "Token Length--- 10313\n",
      "Processing i 283\n",
      "Token Length--- 5939\n",
      "Processing i 284\n",
      "Token Length--- 6866\n",
      "Processing i 285\n",
      "Token Length--- 6689\n",
      "Processing i 286\n",
      "Token Length--- 1538\n",
      "Processing i 287\n",
      "Token Length--- 14948\n",
      "Processing i 288\n",
      "Token Length--- 3250\n",
      "Processing i 289\n",
      "Token Length--- 6725\n",
      "Processing i 290\n",
      "Token Length--- 2120\n",
      "Processing i 291\n",
      "Token Length--- 7557\n",
      "Processing i 292\n",
      "Token Length--- 20235\n",
      "Processing i 293\n",
      "Token Length--- 2184\n",
      "Processing i 294\n",
      "Token Length--- 7918\n",
      "Processing i 295\n",
      "Token Length--- 4209\n",
      "Processing i 296\n",
      "Token Length--- 5740\n",
      "Processing i 297\n",
      "Token Length--- 15182\n",
      "Processing i 298\n",
      "Token Length--- 20948\n",
      "Processing i 299\n",
      "Token Length--- 6560\n",
      "Processing i 300\n",
      "Token Length--- 3232\n",
      "Processing i 301\n",
      "Token Length--- 12268\n",
      "Processing i 302\n",
      "Token Length--- 4326\n",
      "Processing i 303\n",
      "Token Length--- 6301\n",
      "Processing i 304\n",
      "Token Length--- 8853\n",
      "Processing i 305\n",
      "Token Length--- 7593\n",
      "Processing i 306\n",
      "Token Length--- 2521\n",
      "Processing i 307\n",
      "Token Length--- 3703\n",
      "Processing i 308\n",
      "Token Length--- 8045\n",
      "Processing i 309\n",
      "Token Length--- 13774\n",
      "Processing i 310\n",
      "Token Length--- 5979\n",
      "Processing i 311\n",
      "Token Length--- 9075\n",
      "Processing i 312\n",
      "Token Length--- 6691\n",
      "Processing i 313\n",
      "Token Length--- 5749\n",
      "Processing i 314\n",
      "Token Length--- 2589\n",
      "Processing i 315\n",
      "Token Length--- 3863\n",
      "Processing i 316\n",
      "Token Length--- 1348\n",
      "Processing i 317\n",
      "Token Length--- 27996\n",
      "Processing i 318\n",
      "Token Length--- 2827\n",
      "Processing i 319\n",
      "Token Length--- 7994\n",
      "Processing i 320\n",
      "Token Length--- 3675\n",
      "Processing i 321\n",
      "Token Length--- 30013\n",
      "Processing i 322\n",
      "Token Length--- 4693\n",
      "Processing i 323\n",
      "Token Length--- 8871\n",
      "Processing i 324\n",
      "Token Length--- 36542\n",
      "Processing i 325\n",
      "Token Length--- 12937\n",
      "Processing i 326\n",
      "Token Length--- 17178\n",
      "Processing i 327\n",
      "Token Length--- 3498\n",
      "Processing i 328\n",
      "Token Length--- 7353\n",
      "Processing i 329\n",
      "Token Length--- 6980\n",
      "Processing i 330\n",
      "Token Length--- 6190\n",
      "Processing i 331\n",
      "Token Length--- 10543\n",
      "Processing i 332\n",
      "Token Length--- 4020\n",
      "Processing i 333\n",
      "Token Length--- 31440\n",
      "Processing i 334\n",
      "Token Length--- 1859\n",
      "Processing i 335\n",
      "Token Length--- 2349\n",
      "Processing i 336\n",
      "Token Length--- 19856\n",
      "Processing i 337\n",
      "Token Length--- 6776\n",
      "Processing i 338\n",
      "Token Length--- 9517\n",
      "Processing i 339\n",
      "Token Length--- 5062\n",
      "Processing i 340\n",
      "Token Length--- 2235\n",
      "Processing i 341\n",
      "Token Length--- 19363\n",
      "Processing i 342\n",
      "Token Length--- 1273\n",
      "Processing i 343\n",
      "Token Length--- 6223\n",
      "Processing i 344\n",
      "Token Length--- 4266\n",
      "Processing i 345\n",
      "Token Length--- 9229\n",
      "Processing i 346\n",
      "Token Length--- 15923\n",
      "Processing i 347\n",
      "Token Length--- 8224\n",
      "Processing i 348\n",
      "Token Length--- 5983\n",
      "Processing i 349\n",
      "Token Length--- 3173\n",
      "Processing i 350\n",
      "Token Length--- 16059\n",
      "Processing i 351\n",
      "Token Length--- 6176\n",
      "Processing i 352\n",
      "Token Length--- 4072\n",
      "Processing i 353\n",
      "Token Length--- 11140\n",
      "Processing i 354\n",
      "Token Length--- 6936\n",
      "Processing i 355\n",
      "Token Length--- 4180\n",
      "Processing i 356\n",
      "Token Length--- 1300\n",
      "Processing i 357\n",
      "Token Length--- 5670\n",
      "Processing i 358\n",
      "Token Length--- 14215\n",
      "Processing i 359\n",
      "Token Length--- 21767\n",
      "Processing i 360\n",
      "Token Length--- 2522\n",
      "Processing i 361\n",
      "Token Length--- 12803\n",
      "Processing i 362\n",
      "Token Length--- 3035\n",
      "Processing i 363\n",
      "Token Length--- 4202\n",
      "Processing i 364\n",
      "Token Length--- 2234\n",
      "Processing i 365\n",
      "Token Length--- 16680\n",
      "Processing i 366\n",
      "Token Length--- 10906\n",
      "Processing i 367\n",
      "Token Length--- 14463\n",
      "Processing i 368\n",
      "Token Length--- 10619\n",
      "Processing i 369\n",
      "Token Length--- 1695\n",
      "Processing i 370\n",
      "Token Length--- 2139\n",
      "Processing i 371\n",
      "Token Length--- 4836\n",
      "Processing i 372\n",
      "Token Length--- 22414\n",
      "Processing i 373\n",
      "Token Length--- 2917\n",
      "Processing i 374\n",
      "Token Length--- 6095\n",
      "Processing i 375\n",
      "Token Length--- 13127\n",
      "Processing i 376\n",
      "Token Length--- 12614\n",
      "Processing i 377\n",
      "Token Length--- 10217\n",
      "Processing i 378\n",
      "Token Length--- 6083\n",
      "Processing i 379\n",
      "Token Length--- 4460\n",
      "Processing i 380\n",
      "Token Length--- 38296\n",
      "Processing i 381\n",
      "Token Length--- 3286\n",
      "Processing i 382\n",
      "Token Length--- 24565\n",
      "Processing i 383\n",
      "Token Length--- 13689\n",
      "Processing i 384\n",
      "Token Length--- 15878\n",
      "Processing i 385\n",
      "Token Length--- 30623\n",
      "Processing i 386\n",
      "Token Length--- 15534\n",
      "Processing i 387\n",
      "Token Length--- 22081\n",
      "Processing i 388\n",
      "Token Length--- 6959\n",
      "Processing i 389\n",
      "Token Length--- 15040\n",
      "Processing i 390\n",
      "Token Length--- 3269\n",
      "Processing i 391\n",
      "Token Length--- 2935\n",
      "Processing i 392\n",
      "Token Length--- 2151\n",
      "Processing i 393\n",
      "Token Length--- 1470\n",
      "Processing i 394\n",
      "Token Length--- 6848\n",
      "Processing i 395\n",
      "Token Length--- 4498\n",
      "Processing i 396\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Token Length--- 9448\n",
      "Processing i 397\n",
      "Token Length--- 6331\n",
      "Processing i 398\n",
      "Token Length--- 11472\n",
      "Processing i 399\n",
      "Token Length--- 3754\n",
      "Processing i 400\n",
      "Token Length--- 10748\n",
      "Processing i 401\n",
      "Token Length--- 2809\n",
      "Processing i 402\n",
      "Token Length--- 9806\n",
      "Processing i 403\n",
      "Token Length--- 3882\n",
      "Processing i 404\n",
      "Token Length--- 21994\n",
      "Processing i 405\n",
      "Token Length--- 5611\n",
      "Processing i 406\n",
      "Token Length--- 34428\n",
      "Processing i 407\n",
      "Token Length--- 6208\n",
      "Processing i 408\n",
      "Token Length--- 3678\n",
      "Processing i 409\n",
      "Token Length--- 8486\n",
      "Processing i 410\n",
      "Token Length--- 4866\n",
      "Processing i 411\n",
      "Token Length--- 9873\n",
      "Processing i 412\n",
      "Token Length--- 17649\n",
      "Processing i 413\n",
      "Token Length--- 14249\n",
      "Processing i 414\n",
      "Token Length--- 3247\n",
      "Processing i 415\n",
      "Token Length--- 4013\n",
      "Processing i 416\n",
      "Token Length--- 1494\n",
      "Processing i 417\n",
      "Token Length--- 20339\n",
      "Processing i 418\n",
      "Token Length--- 2422\n",
      "Processing i 419\n",
      "Token Length--- 2223\n",
      "Processing i 420\n",
      "Token Length--- 6468\n",
      "Processing i 421\n",
      "Token Length--- 30527\n",
      "Processing i 422\n",
      "Token Length--- 3318\n",
      "Processing i 423\n",
      "Token Length--- 5278\n",
      "Processing i 424\n",
      "Token Length--- 6424\n",
      "Processing i 425\n",
      "Token Length--- 10997\n",
      "Processing i 426\n",
      "Token Length--- 11887\n",
      "Processing i 427\n",
      "Token Length--- 12672\n",
      "Processing i 428\n",
      "Token Length--- 9058\n",
      "Processing i 429\n",
      "Token Length--- 9188\n",
      "Processing i 430\n",
      "Token Length--- 5507\n",
      "Processing i 431\n",
      "Token Length--- 1956\n",
      "Processing i 432\n",
      "Token Length--- 19261\n",
      "Processing i 433\n",
      "Token Length--- 23470\n",
      "Processing i 434\n",
      "Token Length--- 15154\n",
      "Processing i 435\n",
      "Token Length--- 579\n",
      "Processing i 436\n",
      "Token Length--- 15867\n",
      "Processing i 437\n",
      "Token Length--- 3077\n",
      "Processing i 438\n",
      "Token Length--- 4355\n",
      "Processing i 439\n",
      "Token Length--- 3058\n",
      "Processing i 440\n",
      "Token Length--- 9267\n",
      "Processing i 441\n",
      "Token Length--- 23181\n",
      "Processing i 442\n",
      "Token Length--- 6699\n",
      "Processing i 443\n",
      "Token Length--- 8226\n",
      "Processing i 444\n",
      "Token Length--- 17325\n",
      "Processing i 445\n",
      "Token Length--- 2105\n",
      "Processing i 446\n",
      "Token Length--- 26512\n",
      "Processing i 447\n",
      "Token Length--- 9361\n",
      "Processing i 448\n",
      "Token Length--- 6726\n",
      "Processing i 449\n",
      "Token Length--- 6574\n",
      "Processing i 450\n",
      "Token Length--- 1638\n",
      "Processing i 451\n",
      "Token Length--- 6193\n",
      "Processing i 452\n",
      "Token Length--- 1561\n",
      "Processing i 453\n",
      "Token Length--- 1547\n",
      "Processing i 454\n",
      "Token Length--- 1471\n",
      "Processing i 455\n",
      "Token Length--- 13343\n",
      "Processing i 456\n",
      "Token Length--- 25105\n",
      "Processing i 457\n",
      "Token Length--- 2307\n",
      "Processing i 458\n",
      "Token Length--- 4808\n",
      "Processing i 459\n",
      "Token Length--- 1193\n",
      "Processing i 460\n",
      "Token Length--- 1049\n",
      "Processing i 461\n",
      "Token Length--- 512\n",
      "Processing i 462\n",
      "Token Length--- 18666\n",
      "Processing i 463\n",
      "Token Length--- 7489\n",
      "Processing i 464\n",
      "Token Length--- 5345\n",
      "Processing i 465\n",
      "Token Length--- 13435\n",
      "Processing i 466\n",
      "Token Length--- 3112\n",
      "Processing i 467\n",
      "Token Length--- 6618\n",
      "Processing i 468\n",
      "Token Length--- 6865\n",
      "Processing i 469\n",
      "Token Length--- 40071\n",
      "Processing i 470\n",
      "Token Length--- 2087\n",
      "Processing i 471\n",
      "Token Length--- 1022\n",
      "Processing i 472\n",
      "Token Length--- 2367\n",
      "Processing i 473\n",
      "Token Length--- 1819\n",
      "Processing i 474\n",
      "Token Length--- 922\n",
      "Processing i 475\n",
      "Token Length--- 3818\n",
      "Processing i 476\n",
      "Token Length--- 20473\n",
      "Processing i 477\n",
      "Token Length--- 25111\n",
      "Processing i 478\n",
      "Token Length--- 20005\n",
      "Processing i 479\n",
      "Token Length--- 5085\n",
      "Processing i 480\n",
      "Token Length--- 5441\n",
      "Processing i 481\n",
      "Token Length--- 16002\n",
      "Processing i 482\n",
      "Token Length--- 16667\n",
      "Processing i 483\n",
      "Token Length--- 21580\n",
      "Processing i 484\n",
      "Token Length--- 12044\n",
      "Processing i 485\n",
      "Token Length--- 7594\n",
      "Processing i 486\n",
      "Token Length--- 4203\n",
      "Processing i 487\n",
      "Token Length--- 10108\n",
      "Processing i 488\n",
      "Token Length--- 8007\n",
      "Processing i 489\n",
      "Token Length--- 15621\n",
      "Processing i 490\n",
      "Token Length--- 10113\n",
      "Processing i 491\n",
      "Token Length--- 2391\n",
      "Processing i 492\n",
      "Token Length--- 7271\n",
      "Processing i 493\n",
      "Token Length--- 7381\n",
      "Processing i 494\n",
      "Token Length--- 26330\n",
      "Processing i 495\n",
      "Token Length--- 1166\n",
      "Processing i 496\n",
      "Token Length--- 22685\n",
      "Processing i 497\n",
      "Token Length--- 10779\n",
      "Processing i 498\n",
      "Token Length--- 3775\n",
      "Processing i 499\n",
      "Token Length--- 8568\n",
      "Processing i 500\n",
      "Token Length--- 7142\n",
      "Processing i 501\n",
      "Token Length--- 4786\n",
      "Processing i 502\n",
      "Token Length--- 1923\n",
      "Processing i 503\n",
      "Token Length--- 1818\n",
      "Processing i 504\n",
      "Token Length--- 2200\n",
      "Processing i 505\n",
      "Token Length--- 1998\n",
      "Processing i 506\n",
      "Token Length--- 4142\n",
      "Processing i 507\n",
      "Token Length--- 2581\n",
      "Processing i 508\n",
      "Token Length--- 34054\n",
      "Processing i 509\n",
      "Token Length--- 13846\n",
      "Processing i 510\n",
      "Token Length--- 7285\n",
      "Processing i 511\n",
      "Token Length--- 1986\n",
      "Processing i 512\n",
      "Token Length--- 13509\n",
      "Processing i 513\n",
      "Token Length--- 2687\n",
      "Processing i 514\n",
      "Token Length--- 5343\n",
      "Processing i 515\n",
      "Token Length--- 6503\n",
      "Processing i 516\n",
      "Token Length--- 10847\n",
      "Processing i 517\n",
      "Token Length--- 8041\n",
      "Processing i 518\n",
      "Token Length--- 2464\n",
      "Processing i 519\n",
      "Token Length--- 2966\n",
      "Processing i 520\n",
      "Token Length--- 2577\n",
      "Processing i 521\n",
      "Token Length--- 54968\n",
      "Processing i 522\n",
      "Token Length--- 5471\n",
      "Processing i 523\n",
      "Token Length--- 11708\n",
      "Processing i 524\n",
      "Token Length--- 14083\n",
      "Processing i 525\n",
      "Token Length--- 4081\n",
      "Processing i 526\n",
      "Token Length--- 3370\n",
      "Processing i 527\n",
      "Token Length--- 11873\n",
      "Processing i 528\n",
      "Token Length--- 10624\n",
      "Processing i 529\n",
      "Token Length--- 33934\n",
      "Processing i 530\n",
      "Token Length--- 5947\n",
      "Processing i 531\n",
      "Token Length--- 3072\n",
      "Processing i 532\n",
      "Token Length--- 6706\n",
      "Processing i 533\n",
      "Token Length--- 4558\n",
      "Processing i 534\n",
      "Token Length--- 7337\n",
      "Processing i 535\n",
      "Token Length--- 4091\n",
      "Processing i 536\n",
      "Token Length--- 40642\n",
      "Processing i 537\n",
      "Token Length--- 3711\n",
      "Processing i 538\n",
      "Token Length--- 2650\n",
      "Processing i 539\n",
      "Token Length--- 4891\n",
      "Processing i 540\n",
      "Token Length--- 712\n",
      "Processing i 541\n",
      "Token Length--- 8105\n",
      "Processing i 542\n",
      "Token Length--- 1942\n",
      "Processing i 543\n",
      "Token Length--- 5180\n",
      "Processing i 544\n",
      "Token Length--- 12249\n",
      "Processing i 545\n",
      "Token Length--- 7790\n",
      "Processing i 546\n",
      "Token Length--- 3727\n",
      "Processing i 547\n",
      "Token Length--- 5835\n",
      "Processing i 548\n",
      "Token Length--- 9024\n",
      "Processing i 549\n",
      "Token Length--- 5886\n",
      "Processing i 550\n",
      "Token Length--- 5751\n",
      "Processing i 551\n",
      "Token Length--- 7350\n",
      "Processing i 552\n",
      "Token Length--- 6006\n",
      "Processing i 553\n",
      "Token Length--- 21648\n",
      "Processing i 554\n",
      "Token Length--- 2001\n",
      "Processing i 555\n",
      "Token Length--- 1275\n",
      "Processing i 556\n",
      "Token Length--- 2739\n",
      "Processing i 557\n",
      "Token Length--- 2707\n",
      "Processing i 558\n",
      "Token Length--- 18184\n",
      "Processing i 559\n",
      "Token Length--- 1122\n",
      "Processing i 560\n",
      "Token Length--- 17578\n",
      "Processing i 561\n",
      "Token Length--- 1882\n",
      "Processing i 562\n",
      "Token Length--- 2888\n",
      "Processing i 563\n",
      "Token Length--- 3175\n",
      "Processing i 564\n",
      "Token Length--- 8053\n",
      "Processing i 565\n",
      "Token Length--- 5233\n",
      "Processing i 566\n",
      "Token Length--- 46367\n",
      "Processing i 567\n",
      "Token Length--- 2461\n",
      "Processing i 568\n",
      "Token Length--- 3196\n",
      "Processing i 569\n",
      "Token Length--- 2777\n",
      "Processing i 570\n",
      "Token Length--- 1140\n",
      "Processing i 571\n",
      "Token Length--- 4289\n",
      "Processing i 572\n",
      "Token Length--- 11534\n",
      "Processing i 573\n",
      "Token Length--- 6006\n",
      "Processing i 574\n",
      "Token Length--- 29930\n",
      "Processing i 575\n",
      "Token Length--- 17833\n",
      "Processing i 576\n",
      "Token Length--- 3427\n",
      "Processing i 577\n",
      "Token Length--- 1688\n",
      "Processing i 578\n",
      "Token Length--- 16601\n",
      "Processing i 579\n",
      "Token Length--- 5306\n",
      "Processing i 580\n",
      "Token Length--- 12075\n",
      "Processing i 581\n",
      "Token Length--- 5970\n",
      "Processing i 582\n",
      "Token Length--- 4865\n",
      "Processing i 583\n",
      "Token Length--- 1467\n",
      "Processing i 584\n",
      "Token Length--- 3879\n",
      "Processing i 585\n",
      "Token Length--- 10491\n",
      "Processing i 586\n",
      "Token Length--- 8819\n",
      "Processing i 587\n",
      "Token Length--- 4284\n",
      "Processing i 588\n",
      "Token Length--- 15972\n",
      "Processing i 589\n",
      "Token Length--- 6589\n",
      "Processing i 590\n",
      "Token Length--- 9747\n",
      "Processing i 591\n",
      "Token Length--- 57105\n",
      "Processing i 592\n",
      "Token Length--- 14267\n",
      "Processing i 593\n",
      "Token Length--- 13140\n",
      "Processing i 594\n",
      "Token Length--- 3297\n",
      "Processing i 595\n",
      "Token Length--- 12712\n",
      "Processing i 596\n",
      "Token Length--- 11905\n",
      "Processing i 597\n",
      "Token Length--- 2415\n",
      "Processing i 598\n",
      "Token Length--- 13565\n",
      "Processing i 599\n",
      "Token Length--- 8306\n",
      "Processing i 600\n",
      "Token Length--- 11794\n",
      "Processing i 601\n",
      "Token Length--- 17798\n",
      "Processing i 602\n",
      "Token Length--- 1740\n",
      "Processing i 603\n",
      "Token Length--- 1942\n",
      "Processing i 604\n",
      "Token Length--- 2880\n",
      "Processing i 605\n",
      "Token Length--- 12440\n",
      "Processing i 606\n",
      "Token Length--- 9519\n",
      "Processing i 607\n",
      "Token Length--- 1443\n",
      "Processing i 608\n",
      "Token Length--- 10972\n",
      "Processing i 609\n",
      "Token Length--- 20365\n",
      "Processing i 610\n",
      "Token Length--- 2667\n",
      "Processing i 611\n",
      "Token Length--- 2613\n",
      "Processing i 612\n",
      "Token Length--- 15839\n",
      "Processing i 613\n",
      "Token Length--- 2272\n",
      "Processing i 614\n",
      "Token Length--- 13080\n",
      "Processing i 615\n",
      "Token Length--- 5810\n",
      "Processing i 616\n",
      "Token Length--- 9910\n",
      "Processing i 617\n",
      "Token Length--- 2746\n",
      "Processing i 618\n",
      "Token Length--- 28136\n",
      "Processing i 619\n",
      "Token Length--- 2916\n",
      "Processing i 620\n",
      "Token Length--- 1441\n",
      "Processing i 621\n",
      "Token Length--- 5224\n",
      "Processing i 622\n",
      "Token Length--- 3078\n",
      "Processing i 623\n",
      "Token Length--- 13004\n",
      "Processing i 624\n",
      "Token Length--- 4229\n",
      "Processing i 625\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Token Length--- 8220\n",
      "Processing i 626\n",
      "Token Length--- 15135\n",
      "Processing i 627\n",
      "Token Length--- 14350\n",
      "Processing i 628\n",
      "Token Length--- 1783\n",
      "Processing i 629\n",
      "Token Length--- 2563\n",
      "Processing i 630\n",
      "Token Length--- 13590\n",
      "Processing i 631\n",
      "Token Length--- 19707\n",
      "Processing i 632\n",
      "Token Length--- 6011\n",
      "Processing i 633\n",
      "Token Length--- 1374\n",
      "Processing i 634\n",
      "Token Length--- 2176\n",
      "Processing i 635\n",
      "Token Length--- 8136\n",
      "Processing i 636\n",
      "Token Length--- 34811\n",
      "Processing i 637\n",
      "Token Length--- 4755\n",
      "Processing i 638\n",
      "Token Length--- 6005\n",
      "Processing i 639\n",
      "Token Length--- 13084\n",
      "Processing i 640\n",
      "Token Length--- 7393\n",
      "Processing i 641\n",
      "Token Length--- 3404\n",
      "Processing i 642\n",
      "Token Length--- 11938\n",
      "Processing i 643\n",
      "Token Length--- 19908\n",
      "Processing i 644\n",
      "Token Length--- 3240\n",
      "Processing i 645\n",
      "Token Length--- 14686\n",
      "Processing i 646\n",
      "Token Length--- 18894\n",
      "Processing i 647\n",
      "Token Length--- 5990\n",
      "Processing i 648\n",
      "Token Length--- 4126\n",
      "Processing i 649\n",
      "Token Length--- 32738\n",
      "Processing i 650\n",
      "Token Length--- 508\n",
      "Processing i 651\n",
      "Token Length--- 11567\n",
      "Processing i 652\n",
      "Token Length--- 6740\n",
      "Processing i 653\n",
      "Token Length--- 15090\n",
      "Processing i 654\n",
      "Token Length--- 12273\n",
      "Processing i 655\n",
      "Token Length--- 11503\n",
      "Processing i 656\n",
      "Token Length--- 5992\n",
      "Processing i 657\n",
      "Token Length--- 3272\n",
      "Processing i 658\n",
      "Token Length--- 2443\n",
      "Processing i 659\n",
      "Token Length--- 4382\n",
      "Processing i 660\n",
      "Token Length--- 1312\n",
      "Processing i 661\n",
      "Token Length--- 4911\n",
      "Processing i 662\n",
      "Token Length--- 8249\n",
      "Processing i 663\n",
      "Token Length--- 7114\n",
      "Processing i 664\n",
      "Token Length--- 23891\n",
      "Processing i 665\n",
      "Token Length--- 6064\n",
      "Processing i 666\n",
      "Token Length--- 826\n",
      "Processing i 667\n",
      "Token Length--- 18601\n",
      "Processing i 668\n",
      "Token Length--- 12515\n",
      "Processing i 669\n",
      "Token Length--- 1358\n",
      "Processing i 670\n",
      "Token Length--- 4034\n",
      "Processing i 671\n",
      "Token Length--- 1794\n",
      "Processing i 672\n",
      "Token Length--- 20208\n",
      "Processing i 673\n",
      "Token Length--- 17804\n",
      "Processing i 674\n",
      "Token Length--- 433\n",
      "Processing i 675\n",
      "Token Length--- 11590\n",
      "Processing i 676\n",
      "Token Length--- 14609\n",
      "Processing i 677\n",
      "Token Length--- 10144\n",
      "Processing i 678\n",
      "Token Length--- 10234\n",
      "Processing i 679\n",
      "Token Length--- 5392\n",
      "Processing i 680\n",
      "Token Length--- 4406\n",
      "Processing i 681\n",
      "Token Length--- 16560\n",
      "Processing i 682\n",
      "Token Length--- 23470\n",
      "Processing i 683\n",
      "Token Length--- 24004\n",
      "Processing i 684\n",
      "Token Length--- 6661\n",
      "Processing i 685\n",
      "Token Length--- 2478\n",
      "Processing i 686\n",
      "Token Length--- 31448\n",
      "Processing i 687\n",
      "Token Length--- 1319\n",
      "Processing i 688\n",
      "Token Length--- 1815\n",
      "Processing i 689\n",
      "Token Length--- 19952\n",
      "Processing i 690\n",
      "Token Length--- 9467\n",
      "Processing i 691\n",
      "Token Length--- 23096\n",
      "Processing i 692\n",
      "Token Length--- 18875\n",
      "Processing i 693\n",
      "Token Length--- 2022\n",
      "Processing i 694\n",
      "Token Length--- 2820\n",
      "Processing i 695\n",
      "Token Length--- 5179\n",
      "Processing i 696\n",
      "Token Length--- 2383\n",
      "Processing i 697\n",
      "Token Length--- 5299\n",
      "Processing i 698\n",
      "Token Length--- 1625\n",
      "Processing i 699\n",
      "Token Length--- 6040\n",
      "Processing i 700\n",
      "Token Length--- 11654\n",
      "Processing i 701\n",
      "Token Length--- 12808\n",
      "Processing i 702\n",
      "Token Length--- 1059\n",
      "Processing i 703\n",
      "Token Length--- 3426\n",
      "Processing i 704\n",
      "Token Length--- 3201\n",
      "Processing i 705\n",
      "Token Length--- 3675\n",
      "Processing i 706\n",
      "Token Length--- 24467\n",
      "Processing i 707\n",
      "Token Length--- 3538\n",
      "Processing i 708\n",
      "Token Length--- 11691\n",
      "Processing i 709\n",
      "Token Length--- 2385\n",
      "Processing i 710\n",
      "Token Length--- 8817\n",
      "Processing i 711\n",
      "Token Length--- 3435\n",
      "Processing i 712\n",
      "Token Length--- 15842\n",
      "Processing i 713\n",
      "Token Length--- 8777\n",
      "Processing i 714\n",
      "Token Length--- 7265\n",
      "Processing i 715\n",
      "Token Length--- 4202\n",
      "Processing i 716\n",
      "Token Length--- 4977\n",
      "Processing i 717\n",
      "Token Length--- 677\n",
      "Processing i 718\n",
      "Token Length--- 12040\n",
      "Processing i 719\n",
      "Token Length--- 6853\n",
      "Processing i 720\n",
      "Token Length--- 6652\n",
      "Processing i 721\n",
      "Token Length--- 1744\n",
      "Processing i 722\n",
      "Token Length--- 2308\n",
      "Processing i 723\n",
      "Token Length--- 1308\n",
      "Processing i 724\n",
      "Token Length--- 4063\n",
      "Processing i 725\n",
      "Token Length--- 3458\n",
      "Processing i 726\n",
      "Token Length--- 26655\n",
      "Processing i 727\n",
      "Token Length--- 4676\n",
      "Processing i 728\n",
      "Token Length--- 1649\n",
      "Processing i 729\n",
      "Token Length--- 27598\n",
      "Processing i 730\n",
      "Token Length--- 6875\n",
      "Processing i 731\n",
      "Token Length--- 7198\n",
      "Processing i 732\n",
      "Token Length--- 5629\n",
      "Processing i 733\n",
      "Token Length--- 5112\n",
      "Processing i 734\n",
      "Token Length--- 2197\n",
      "Processing i 735\n",
      "Token Length--- 4761\n",
      "Processing i 736\n",
      "Token Length--- 7062\n",
      "Processing i 737\n",
      "Token Length--- 3230\n",
      "Processing i 738\n",
      "Token Length--- 5993\n",
      "Processing i 739\n",
      "Token Length--- 3140\n",
      "Processing i 740\n",
      "Token Length--- 18018\n",
      "Processing i 741\n",
      "Token Length--- 1372\n",
      "Processing i 742\n",
      "Token Length--- 7374\n",
      "Processing i 743\n",
      "Token Length--- 17625\n",
      "Processing i 744\n",
      "Token Length--- 2635\n",
      "Processing i 745\n",
      "Token Length--- 2811\n",
      "Processing i 746\n",
      "Token Length--- 5899\n",
      "Processing i 747\n",
      "Token Length--- 4927\n",
      "Processing i 748\n",
      "Token Length--- 13874\n",
      "Processing i 749\n",
      "Token Length--- 1317\n",
      "Processing i 750\n",
      "Token Length--- 5464\n",
      "Processing i 751\n",
      "Token Length--- 8212\n",
      "Processing i 752\n",
      "Token Length--- 13528\n",
      "Processing i 753\n",
      "Token Length--- 2621\n",
      "Processing i 754\n",
      "Token Length--- 7960\n",
      "Processing i 755\n",
      "Token Length--- 14088\n",
      "Processing i 756\n",
      "Token Length--- 15476\n",
      "Processing i 757\n",
      "Token Length--- 2334\n",
      "Processing i 758\n",
      "Token Length--- 9006\n",
      "Processing i 759\n",
      "Token Length--- 6242\n",
      "Processing i 760\n",
      "Token Length--- 7233\n",
      "Processing i 761\n",
      "Token Length--- 3324\n",
      "Processing i 762\n",
      "Token Length--- 533\n",
      "Processing i 763\n",
      "Token Length--- 1375\n",
      "Processing i 764\n",
      "Token Length--- 7538\n",
      "Processing i 765\n",
      "Token Length--- 31236\n",
      "Processing i 766\n",
      "Token Length--- 4458\n",
      "Processing i 767\n",
      "Token Length--- 8743\n",
      "Processing i 768\n",
      "Token Length--- 10199\n",
      "Processing i 769\n",
      "Token Length--- 5551\n",
      "Processing i 770\n",
      "Token Length--- 14902\n",
      "Processing i 771\n",
      "Token Length--- 12988\n",
      "Processing i 772\n",
      "Token Length--- 6997\n",
      "Processing i 773\n",
      "Token Length--- 2010\n",
      "Processing i 774\n",
      "Token Length--- 9343\n",
      "Processing i 775\n",
      "Token Length--- 6144\n",
      "Processing i 776\n",
      "Token Length--- 6990\n",
      "Processing i 777\n",
      "Token Length--- 3085\n",
      "Processing i 778\n",
      "Token Length--- 12488\n",
      "Processing i 779\n",
      "Token Length--- 9107\n",
      "Processing i 780\n",
      "Token Length--- 3257\n",
      "Processing i 781\n",
      "Token Length--- 4553\n",
      "Processing i 782\n",
      "Token Length--- 5218\n",
      "Processing i 783\n",
      "Token Length--- 12302\n",
      "Processing i 784\n",
      "Token Length--- 18395\n",
      "Processing i 785\n",
      "Token Length--- 5785\n",
      "Processing i 786\n",
      "Token Length--- 11731\n",
      "Processing i 787\n",
      "Token Length--- 3511\n",
      "Processing i 788\n",
      "Token Length--- 3905\n",
      "Processing i 789\n",
      "Token Length--- 13082\n",
      "Processing i 790\n",
      "Token Length--- 35279\n",
      "Processing i 791\n",
      "Token Length--- 4994\n",
      "Processing i 792\n",
      "Token Length--- 11027\n",
      "Processing i 793\n",
      "Token Length--- 6273\n",
      "Processing i 794\n",
      "Token Length--- 5662\n",
      "Processing i 795\n",
      "Token Length--- 4596\n",
      "Processing i 796\n",
      "Token Length--- 6365\n",
      "Processing i 797\n",
      "Token Length--- 6398\n",
      "Processing i 798\n",
      "Token Length--- 12957\n",
      "Processing i 799\n",
      "Token Length--- 2495\n",
      "Processing i 800\n",
      "Token Length--- 7704\n",
      "Processing i 801\n",
      "Token Length--- 4456\n",
      "Processing i 802\n",
      "Token Length--- 1286\n",
      "Processing i 803\n",
      "Token Length--- 1489\n",
      "Processing i 804\n",
      "Token Length--- 7456\n",
      "Processing i 805\n",
      "Token Length--- 6449\n",
      "Processing i 806\n",
      "Token Length--- 2238\n",
      "Processing i 807\n",
      "Token Length--- 4678\n",
      "Processing i 808\n",
      "Token Length--- 3547\n",
      "Processing i 809\n",
      "Token Length--- 6845\n",
      "Processing i 810\n",
      "Token Length--- 19726\n",
      "Processing i 811\n",
      "Token Length--- 7357\n",
      "Processing i 812\n",
      "Token Length--- 7601\n",
      "Processing i 813\n",
      "Token Length--- 10405\n",
      "Processing i 814\n",
      "Token Length--- 1377\n",
      "Processing i 815\n",
      "Token Length--- 8266\n",
      "Processing i 816\n",
      "Token Length--- 3515\n",
      "Processing i 817\n",
      "Token Length--- 1328\n",
      "Processing i 818\n",
      "Token Length--- 4459\n",
      "Processing i 819\n",
      "Token Length--- 3382\n",
      "Processing i 820\n",
      "Token Length--- 4587\n",
      "Processing i 821\n",
      "Token Length--- 6906\n",
      "Processing i 822\n",
      "Token Length--- 4075\n",
      "Processing i 823\n",
      "Token Length--- 4424\n",
      "Processing i 824\n",
      "Token Length--- 3740\n",
      "Processing i 825\n",
      "Token Length--- 7744\n",
      "Processing i 826\n",
      "Token Length--- 15102\n",
      "Processing i 827\n",
      "Token Length--- 3994\n",
      "Processing i 828\n",
      "Token Length--- 12285\n",
      "Processing i 829\n",
      "Token Length--- 1688\n",
      "Processing i 830\n",
      "Token Length--- 11141\n",
      "Processing i 831\n",
      "Token Length--- 7789\n",
      "Processing i 832\n",
      "Token Length--- 7395\n",
      "Processing i 833\n",
      "Token Length--- 12137\n",
      "Processing i 834\n",
      "Token Length--- 2805\n",
      "Processing i 835\n",
      "Token Length--- 10727\n",
      "Processing i 836\n",
      "Token Length--- 2698\n",
      "Processing i 837\n",
      "Token Length--- 3179\n",
      "Processing i 838\n",
      "Token Length--- 5218\n",
      "Processing i 839\n",
      "Token Length--- 9932\n",
      "Processing i 840\n",
      "Token Length--- 10565\n",
      "Processing i 841\n",
      "Token Length--- 2311\n",
      "Processing i 842\n",
      "Token Length--- 8742\n",
      "Processing i 843\n",
      "Token Length--- 9565\n",
      "Processing i 844\n",
      "Token Length--- 1322\n",
      "Processing i 845\n",
      "Token Length--- 8491\n",
      "Processing i 846\n",
      "Token Length--- 7176\n",
      "Processing i 847\n",
      "Token Length--- 4596\n",
      "Processing i 848\n",
      "Token Length--- 10559\n",
      "Processing i 849\n",
      "Token Length--- 11478\n",
      "Processing i 850\n",
      "Token Length--- 10335\n",
      "Processing i 851\n",
      "Token Length--- 8829\n",
      "Processing i 852\n",
      "Token Length--- 4110\n",
      "Processing i 853\n",
      "Token Length--- 20801\n",
      "Processing i 854\n",
      "Token Length--- 2016\n",
      "Processing i 855\n",
      "Token Length--- 52098\n",
      "Processing i 856\n",
      "Token Length--- 5009\n",
      "Processing i 857\n",
      "Token Length--- 3263\n",
      "Processing i 858\n",
      "Token Length--- 7582\n",
      "Processing i 859\n",
      "Token Length--- 3628\n",
      "Processing i 860\n",
      "Token Length--- 3351\n",
      "Processing i 861\n",
      "Token Length--- 2426\n",
      "Processing i 862\n",
      "Token Length--- 2614\n",
      "Processing i 863\n",
      "Token Length--- 1154\n",
      "Processing i 864\n",
      "Token Length--- 1114\n",
      "Processing i 865\n",
      "Token Length--- 779\n",
      "Processing i 866\n",
      "Token Length--- 10341\n",
      "Processing i 867\n",
      "Token Length--- 1902\n",
      "Processing i 868\n",
      "Token Length--- 2823\n",
      "Processing i 869\n",
      "Token Length--- 6288\n",
      "Processing i 870\n",
      "Token Length--- 3036\n",
      "Processing i 871\n",
      "Token Length--- 26950\n",
      "Processing i 872\n",
      "Token Length--- 4378\n",
      "Processing i 873\n",
      "Token Length--- 6596\n",
      "Processing i 874\n",
      "Token Length--- 22101\n",
      "Processing i 875\n",
      "Token Length--- 6661\n",
      "Processing i 876\n",
      "Token Length--- 24801\n",
      "Processing i 877\n",
      "Token Length--- 2154\n",
      "Processing i 878\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Token Length--- 6035\n",
      "Processing i 879\n",
      "Token Length--- 13952\n",
      "Processing i 880\n",
      "Token Length--- 15884\n",
      "Processing i 881\n",
      "Token Length--- 2307\n",
      "Processing i 882\n",
      "Token Length--- 2984\n",
      "Processing i 883\n",
      "Token Length--- 974\n",
      "Processing i 884\n",
      "Token Length--- 6967\n",
      "Processing i 885\n",
      "Token Length--- 3192\n",
      "Processing i 886\n",
      "Token Length--- 16721\n",
      "Processing i 887\n",
      "Token Length--- 5468\n",
      "Processing i 888\n",
      "Token Length--- 4919\n",
      "Processing i 889\n",
      "Token Length--- 5864\n",
      "Processing i 890\n",
      "Token Length--- 2930\n",
      "Processing i 891\n",
      "Token Length--- 2702\n",
      "Processing i 892\n",
      "Token Length--- 22803\n",
      "Processing i 893\n",
      "Token Length--- 9936\n",
      "Processing i 894\n",
      "Token Length--- 5992\n",
      "Processing i 895\n",
      "Token Length--- 1086\n",
      "Processing i 896\n",
      "Token Length--- 7270\n",
      "Processing i 897\n",
      "Token Length--- 1812\n",
      "Processing i 898\n",
      "Token Length--- 8546\n",
      "Processing i 899\n",
      "Token Length--- 4598\n",
      "Processing i 900\n",
      "Token Length--- 3955\n",
      "Processing i 901\n",
      "Token Length--- 1476\n",
      "Processing i 902\n",
      "Token Length--- 2697\n",
      "Processing i 903\n",
      "Token Length--- 6676\n",
      "Processing i 904\n",
      "Token Length--- 4234\n",
      "Processing i 905\n",
      "Token Length--- 8877\n",
      "Processing i 906\n",
      "Token Length--- 34375\n",
      "Processing i 907\n",
      "Token Length--- 1862\n",
      "Processing i 908\n",
      "Token Length--- 6273\n",
      "Processing i 909\n",
      "Token Length--- 10498\n",
      "Processing i 910\n",
      "Token Length--- 2048\n",
      "Processing i 911\n",
      "Token Length--- 3036\n",
      "Processing i 912\n",
      "Token Length--- 13645\n",
      "Processing i 913\n",
      "Token Length--- 2566\n",
      "Processing i 914\n",
      "Token Length--- 3441\n",
      "Processing i 915\n",
      "Token Length--- 7857\n",
      "Processing i 916\n",
      "Token Length--- 6106\n",
      "Processing i 917\n",
      "Token Length--- 6089\n",
      "Processing i 918\n",
      "Token Length--- 5356\n",
      "Processing i 919\n",
      "Token Length--- 3874\n",
      "Processing i 920\n",
      "Token Length--- 7245\n",
      "Processing i 921\n",
      "Token Length--- 4646\n",
      "Processing i 922\n",
      "Token Length--- 11713\n",
      "Processing i 923\n",
      "Token Length--- 16890\n",
      "Processing i 924\n",
      "Token Length--- 3711\n",
      "Processing i 925\n",
      "Token Length--- 3226\n",
      "Processing i 926\n",
      "Token Length--- 2453\n",
      "Processing i 927\n",
      "Token Length--- 26174\n",
      "Processing i 928\n",
      "Token Length--- 5050\n",
      "Processing i 929\n",
      "Token Length--- 4583\n",
      "Processing i 930\n",
      "Token Length--- 6224\n",
      "Processing i 931\n",
      "Token Length--- 3274\n",
      "Processing i 932\n",
      "Token Length--- 26967\n",
      "Processing i 933\n",
      "Token Length--- 1596\n",
      "Processing i 934\n",
      "Token Length--- 3694\n",
      "Processing i 935\n",
      "Token Length--- 35316\n",
      "Processing i 936\n",
      "Token Length--- 19418\n",
      "Processing i 937\n",
      "Token Length--- 8377\n",
      "Processing i 938\n",
      "Token Length--- 4344\n",
      "Processing i 939\n",
      "Token Length--- 1800\n",
      "Processing i 940\n",
      "Token Length--- 2595\n",
      "Processing i 941\n",
      "Token Length--- 899\n",
      "Processing i 942\n",
      "Token Length--- 9874\n",
      "Processing i 943\n",
      "Token Length--- 6400\n",
      "Processing i 944\n",
      "Token Length--- 9140\n",
      "Processing i 945\n",
      "Token Length--- 29082\n",
      "Processing i 946\n",
      "Token Length--- 14506\n",
      "Processing i 947\n",
      "Token Length--- 4559\n",
      "Processing i 948\n",
      "Token Length--- 10623\n",
      "Processing i 949\n",
      "Token Length--- 19081\n",
      "Processing i 950\n",
      "Token Length--- 31729\n",
      "Processing i 951\n",
      "Token Length--- 5638\n",
      "Processing i 952\n",
      "Token Length--- 3664\n",
      "Processing i 953\n",
      "Token Length--- 15262\n",
      "Processing i 954\n",
      "Token Length--- 23144\n",
      "Processing i 955\n",
      "Token Length--- 11102\n",
      "Processing i 956\n",
      "Token Length--- 3075\n",
      "Processing i 957\n",
      "Token Length--- 3409\n",
      "Processing i 958\n",
      "Token Length--- 24753\n",
      "Processing i 959\n",
      "Token Length--- 3625\n",
      "Processing i 960\n",
      "Token Length--- 25623\n",
      "Processing i 961\n",
      "Token Length--- 15535\n",
      "Processing i 962\n",
      "Token Length--- 8513\n",
      "Processing i 963\n",
      "Token Length--- 10631\n",
      "Processing i 964\n",
      "Token Length--- 7710\n",
      "Processing i 965\n",
      "Token Length--- 2046\n",
      "Processing i 966\n",
      "Token Length--- 12156\n",
      "Processing i 967\n",
      "Token Length--- 12657\n",
      "Processing i 968\n",
      "Token Length--- 7517\n",
      "Processing i 969\n",
      "Token Length--- 7872\n",
      "Processing i 970\n",
      "Token Length--- 13446\n",
      "Processing i 971\n",
      "Token Length--- 33396\n",
      "Processing i 972\n",
      "Token Length--- 21692\n",
      "Processing i 973\n",
      "Token Length--- 9529\n",
      "Processing i 974\n",
      "Token Length--- 14352\n",
      "Processing i 975\n",
      "Token Length--- 2518\n",
      "Processing i 976\n",
      "Token Length--- 6043\n",
      "Processing i 977\n",
      "Token Length--- 2630\n",
      "Processing i 978\n",
      "Token Length--- 9345\n",
      "Processing i 979\n",
      "Token Length--- 9755\n",
      "Processing i 980\n",
      "Token Length--- 3991\n",
      "Processing i 981\n",
      "Token Length--- 6608\n",
      "Processing i 982\n",
      "Token Length--- 996\n",
      "Processing i 983\n",
      "Token Length--- 2908\n",
      "Processing i 984\n",
      "Token Length--- 18821\n",
      "Processing i 985\n",
      "Token Length--- 2002\n",
      "Processing i 986\n",
      "Token Length--- 7048\n",
      "Processing i 987\n",
      "Token Length--- 3048\n",
      "Processing i 988\n",
      "Token Length--- 8748\n",
      "Processing i 989\n",
      "Token Length--- 13827\n",
      "Processing i 990\n",
      "Token Length--- 5308\n",
      "Processing i 991\n",
      "Token Length--- 6556\n",
      "Processing i 992\n",
      "Token Length--- 1697\n",
      "Processing i 993\n",
      "Token Length--- 4065\n",
      "Processing i 994\n",
      "Token Length--- 744\n",
      "Processing i 995\n",
      "Token Length--- 3540\n",
      "Processing i 996\n",
      "Token Length--- 9750\n",
      "Processing i 997\n",
      "Token Length--- 13254\n",
      "Processing i 998\n",
      "Token Length--- 9387\n",
      "Processing i 999\n",
      "Token Length--- 25330\n",
      "Processing i 1000\n",
      "Token Length--- 3172\n",
      "Processing i 1001\n",
      "Token Length--- 21701\n",
      "Processing i 1002\n",
      "Token Length--- 2097\n",
      "Processing i 1003\n",
      "Token Length--- 23500\n",
      "Processing i 1004\n",
      "Token Length--- 2874\n",
      "Processing i 1005\n",
      "Token Length--- 43958\n",
      "Processing i 1006\n",
      "Token Length--- 3759\n",
      "Processing i 1007\n",
      "Token Length--- 7501\n",
      "Processing i 1008\n",
      "Token Length--- 3543\n",
      "Processing i 1009\n",
      "Token Length--- 2682\n",
      "Processing i 1010\n",
      "Token Length--- 9417\n",
      "Processing i 1011\n",
      "Token Length--- 12655\n",
      "Processing i 1012\n",
      "Token Length--- 18075\n",
      "Processing i 1013\n",
      "Token Length--- 1999\n",
      "Processing i 1014\n",
      "Token Length--- 4567\n",
      "Processing i 1015\n",
      "Token Length--- 28461\n",
      "Processing i 1016\n",
      "Token Length--- 7987\n",
      "Processing i 1017\n",
      "Token Length--- 33055\n",
      "Processing i 1018\n",
      "Token Length--- 8180\n",
      "Processing i 1019\n",
      "Token Length--- 9359\n",
      "Processing i 1020\n",
      "Token Length--- 18968\n",
      "Processing i 1021\n",
      "Token Length--- 5549\n",
      "Processing i 1022\n",
      "Token Length--- 7547\n",
      "Processing i 1023\n",
      "Token Length--- 3012\n",
      "Processing i 1024\n",
      "Token Length--- 35310\n",
      "Processing i 1025\n",
      "Token Length--- 3702\n",
      "Processing i 1026\n",
      "Token Length--- 19162\n",
      "Processing i 1027\n",
      "Token Length--- 12170\n",
      "Processing i 1028\n",
      "Token Length--- 18218\n",
      "Processing i 1029\n",
      "Token Length--- 30599\n",
      "Processing i 1030\n",
      "Token Length--- 24008\n",
      "Processing i 1031\n",
      "Token Length--- 3771\n",
      "Processing i 1032\n",
      "Token Length--- 4773\n",
      "Processing i 1033\n",
      "Token Length--- 2593\n",
      "Processing i 1034\n",
      "Token Length--- 25603\n",
      "Processing i 1035\n",
      "Token Length--- 4116\n",
      "Processing i 1036\n",
      "Token Length--- 6556\n",
      "Processing i 1037\n",
      "Token Length--- 8509\n",
      "Processing i 1038\n",
      "Token Length--- 1177\n",
      "Processing i 1039\n",
      "Token Length--- 1867\n",
      "Processing i 1040\n",
      "Token Length--- 10154\n",
      "Processing i 1041\n",
      "Token Length--- 3698\n",
      "Processing i 1042\n",
      "Token Length--- 15880\n",
      "Processing i 1043\n",
      "Token Length--- 7221\n",
      "Processing i 1044\n",
      "Token Length--- 5841\n",
      "Processing i 1045\n",
      "Token Length--- 22532\n",
      "Processing i 1046\n",
      "Token Length--- 6803\n",
      "Processing i 1047\n",
      "Token Length--- 4328\n",
      "Processing i 1048\n",
      "Token Length--- 12853\n",
      "Processing i 1049\n",
      "Token Length--- 2624\n",
      "Processing i 1050\n",
      "Token Length--- 7324\n",
      "Processing i 1051\n",
      "Token Length--- 4488\n",
      "Processing i 1052\n",
      "Token Length--- 12630\n",
      "Processing i 1053\n",
      "Token Length--- 3578\n",
      "Processing i 1054\n",
      "Token Length--- 4456\n",
      "Processing i 1055\n",
      "Token Length--- 13728\n",
      "Processing i 1056\n",
      "Token Length--- 1900\n",
      "Processing i 1057\n",
      "Token Length--- 8329\n",
      "Processing i 1058\n",
      "Token Length--- 6371\n",
      "Processing i 1059\n",
      "Token Length--- 3158\n",
      "Processing i 1060\n",
      "Token Length--- 6430\n",
      "Processing i 1061\n",
      "Token Length--- 13950\n",
      "Processing i 1062\n",
      "Token Length--- 1882\n",
      "Processing i 1063\n",
      "Token Length--- 3131\n",
      "Processing i 1064\n",
      "Token Length--- 7309\n",
      "Processing i 1065\n",
      "Token Length--- 9539\n",
      "Processing i 1066\n",
      "Token Length--- 19144\n",
      "Processing i 1067\n",
      "Token Length--- 3508\n",
      "Processing i 1068\n",
      "Token Length--- 41089\n",
      "Processing i 1069\n",
      "Token Length--- 8175\n",
      "Processing i 1070\n",
      "Token Length--- 5349\n",
      "Processing i 1071\n",
      "Token Length--- 3827\n",
      "Processing i 1072\n",
      "Token Length--- 6783\n",
      "Processing i 1073\n",
      "Token Length--- 1199\n",
      "Processing i 1074\n",
      "Token Length--- 8428\n",
      "Processing i 1075\n",
      "Token Length--- 6444\n",
      "Processing i 1076\n",
      "Token Length--- 3819\n",
      "Processing i 1077\n",
      "Token Length--- 1533\n",
      "Processing i 1078\n",
      "Token Length--- 1229\n",
      "Processing i 1079\n",
      "Token Length--- 23221\n",
      "Processing i 1080\n",
      "Token Length--- 8537\n",
      "Processing i 1081\n",
      "Token Length--- 660\n",
      "Processing i 1082\n",
      "Token Length--- 4985\n",
      "Processing i 1083\n",
      "Token Length--- 7569\n",
      "Processing i 1084\n",
      "Token Length--- 12663\n",
      "Processing i 1085\n",
      "Token Length--- 8746\n",
      "Processing i 1086\n",
      "Token Length--- 8073\n",
      "Processing i 1087\n",
      "Token Length--- 2760\n",
      "Processing i 1088\n",
      "Token Length--- 1664\n",
      "Processing i 1089\n",
      "Token Length--- 1638\n",
      "Processing i 1090\n",
      "Token Length--- 3558\n",
      "Processing i 1091\n",
      "Token Length--- 6463\n",
      "Processing i 1092\n",
      "Token Length--- 10695\n",
      "Processing i 1093\n",
      "Token Length--- 3669\n",
      "Processing i 1094\n",
      "Token Length--- 19361\n",
      "Processing i 1095\n",
      "Token Length--- 12312\n",
      "Processing i 1096\n",
      "Token Length--- 1686\n",
      "Processing i 1097\n",
      "Token Length--- 10768\n",
      "Processing i 1098\n",
      "Token Length--- 4347\n",
      "Processing i 1099\n",
      "Token Length--- 17171\n",
      "Processing i 1100\n",
      "Token Length--- 20610\n",
      "Processing i 1101\n",
      "Token Length--- 2754\n",
      "Processing i 1102\n",
      "Token Length--- 8798\n",
      "Processing i 1103\n",
      "Token Length--- 5288\n",
      "Processing i 1104\n",
      "Token Length--- 12573\n",
      "Processing i 1105\n",
      "Token Length--- 1405\n",
      "Processing i 1106\n",
      "Token Length--- 28465\n",
      "Processing i 1107\n",
      "Token Length--- 10118\n",
      "Processing i 1108\n",
      "Token Length--- 3271\n",
      "Processing i 1109\n",
      "Token Length--- 6489\n",
      "Processing i 1110\n",
      "Token Length--- 13144\n",
      "Processing i 1111\n",
      "Token Length--- 12398\n",
      "Processing i 1112\n",
      "Token Length--- 516\n",
      "Processing i 1113\n",
      "Token Length--- 23634\n",
      "Processing i 1114\n",
      "Token Length--- 20110\n",
      "Processing i 1115\n",
      "Token Length--- 5476\n",
      "Processing i 1116\n",
      "Token Length--- 16216\n",
      "Processing i 1117\n",
      "Token Length--- 6905\n",
      "Processing i 1118\n",
      "Token Length--- 9718\n",
      "Processing i 1119\n",
      "Token Length--- 2603\n",
      "Processing i 1120\n",
      "Token Length--- 21306\n",
      "Processing i 1121\n",
      "Token Length--- 23892\n",
      "Processing i 1122\n",
      "Token Length--- 12185\n",
      "Processing i 1123\n",
      "Token Length--- 15548\n",
      "Processing i 1124\n",
      "Token Length--- 3156\n",
      "Processing i 1125\n",
      "Token Length--- 9834\n",
      "Processing i 1126\n",
      "Token Length--- 11211\n",
      "Processing i 1127\n",
      "Token Length--- 7355\n",
      "Processing i 1128\n",
      "Token Length--- 5128\n",
      "Processing i 1129\n",
      "Token Length--- 24926\n",
      "Processing i 1130\n",
      "Token Length--- 8474\n",
      "Processing i 1131\n",
      "Token Length--- 1277\n",
      "Processing i 1132\n",
      "Token Length--- 8710\n",
      "Processing i 1133\n",
      "Token Length--- 3630\n",
      "Processing i 1134\n",
      "Token Length--- 22269\n",
      "Processing i 1135\n",
      "Token Length--- 1896\n",
      "Processing i 1136\n",
      "Token Length--- 2387\n",
      "Processing i 1137\n",
      "Token Length--- 17618\n",
      "Processing i 1138\n",
      "Token Length--- 2462\n",
      "Processing i 1139\n",
      "Token Length--- 9112\n",
      "Processing i 1140\n",
      "Token Length--- 2994\n",
      "Processing i 1141\n",
      "Token Length--- 3483\n",
      "Processing i 1142\n",
      "Token Length--- 1396\n",
      "Processing i 1143\n",
      "Token Length--- 31110\n",
      "Processing i 1144\n",
      "Token Length--- 1170\n",
      "Processing i 1145\n",
      "Token Length--- 7155\n",
      "Processing i 1146\n",
      "Token Length--- 35517\n",
      "Processing i 1147\n",
      "Token Length--- 5892\n",
      "Processing i 1148\n",
      "Token Length--- 3669\n",
      "Processing i 1149\n",
      "Token Length--- 4193\n",
      "Processing i 1150\n",
      "Token Length--- 9185\n",
      "Processing i 1151\n",
      "Token Length--- 6442\n",
      "Processing i 1152\n",
      "Token Length--- 10560\n",
      "Processing i 1153\n",
      "Token Length--- 4887\n",
      "Processing i 1154\n",
      "Token Length--- 2507\n",
      "Processing i 1155\n",
      "Token Length--- 16860\n",
      "Processing i 1156\n",
      "Token Length--- 896\n",
      "Processing i 1157\n",
      "Token Length--- 16846\n",
      "Processing i 1158\n",
      "Token Length--- 2585\n",
      "Processing i 1159\n",
      "Token Length--- 1913\n",
      "Processing i 1160\n",
      "Token Length--- 2125\n",
      "Processing i 1161\n",
      "Token Length--- 14598\n",
      "Processing i 1162\n",
      "Token Length--- 15671\n",
      "Processing i 1163\n",
      "Token Length--- 4459\n",
      "Processing i 1164\n",
      "Token Length--- 13101\n",
      "Processing i 1165\n",
      "Token Length--- 8546\n",
      "Processing i 1166\n",
      "Token Length--- 8908\n",
      "Processing i 1167\n",
      "Token Length--- 3399\n",
      "Processing i 1168\n",
      "Token Length--- 3904\n",
      "Processing i 1169\n",
      "Token Length--- 9176\n",
      "Processing i 1170\n",
      "Token Length--- 17279\n",
      "Processing i 1171\n",
      "Token Length--- 1248\n",
      "Processing i 1172\n",
      "Token Length--- 17987\n",
      "Processing i 1173\n",
      "Token Length--- 1852\n",
      "Processing i 1174\n",
      "Token Length--- 4473\n",
      "Processing i 1175\n",
      "Token Length--- 12235\n",
      "Processing i 1176\n",
      "Token Length--- 8801\n",
      "Processing i 1177\n",
      "Token Length--- 613\n",
      "Processing i 1178\n",
      "Token Length--- 9271\n",
      "Processing i 1179\n",
      "Token Length--- 8711\n",
      "Processing i 1180\n",
      "Token Length--- 2402\n",
      "Processing i 1181\n",
      "Token Length--- 4497\n",
      "Processing i 1182\n",
      "Token Length--- 13964\n",
      "Processing i 1183\n",
      "Token Length--- 8648\n",
      "Processing i 1184\n",
      "Token Length--- 3704\n",
      "Processing i 1185\n",
      "Token Length--- 18924\n",
      "Processing i 1186\n",
      "Token Length--- 12464\n",
      "Processing i 1187\n",
      "Token Length--- 4232\n",
      "Processing i 1188\n",
      "Token Length--- 2593\n",
      "Processing i 1189\n",
      "Token Length--- 8206\n",
      "Processing i 1190\n",
      "Token Length--- 3107\n",
      "Processing i 1191\n",
      "Token Length--- 3542\n",
      "Processing i 1192\n",
      "Token Length--- 8652\n",
      "Processing i 1193\n",
      "Token Length--- 5645\n",
      "Processing i 1194\n",
      "Token Length--- 6857\n",
      "Processing i 1195\n",
      "Token Length--- 3370\n",
      "Processing i 1196\n",
      "Token Length--- 39160\n",
      "Processing i 1197\n",
      "Token Length--- 6615\n",
      "Processing i 1198\n",
      "Token Length--- 10403\n",
      "Processing i 1199\n",
      "Token Length--- 1088\n",
      "Processing i 1200\n",
      "Token Length--- 8437\n",
      "Processing i 1201\n",
      "Token Length--- 1724\n",
      "Processing i 1202\n",
      "Token Length--- 5971\n",
      "Processing i 1203\n",
      "Token Length--- 16717\n",
      "Processing i 1204\n",
      "Token Length--- 820\n",
      "Processing i 1205\n",
      "Token Length--- 8344\n",
      "Processing i 1206\n",
      "Token Length--- 15597\n",
      "Processing i 1207\n",
      "Token Length--- 4587\n",
      "Processing i 1208\n",
      "Token Length--- 3988\n",
      "Processing i 1209\n",
      "Token Length--- 871\n",
      "Processing i 1210\n",
      "Token Length--- 16615\n",
      "Processing i 1211\n",
      "Token Length--- 2352\n",
      "Processing i 1212\n",
      "Token Length--- 4094\n",
      "Processing i 1213\n",
      "Token Length--- 19063\n",
      "Processing i 1214\n",
      "Token Length--- 32173\n",
      "Processing i 1215\n",
      "Token Length--- 15276\n",
      "Processing i 1216\n",
      "Token Length--- 12064\n",
      "Processing i 1217\n",
      "Token Length--- 2972\n",
      "Processing i 1218\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Token Length--- 11198\n",
      "Processing i 1219\n",
      "Token Length--- 9145\n",
      "Processing i 1220\n",
      "Token Length--- 4560\n",
      "Processing i 1221\n",
      "Token Length--- 6805\n",
      "Processing i 1222\n",
      "Token Length--- 4749\n",
      "Processing i 1223\n",
      "Token Length--- 4230\n",
      "Processing i 1224\n",
      "Token Length--- 10044\n",
      "Processing i 1225\n",
      "Token Length--- 368\n",
      "Processing i 1226\n",
      "Token Length--- 5714\n",
      "Processing i 1227\n",
      "Token Length--- 15219\n",
      "Processing i 1228\n",
      "Token Length--- 1799\n",
      "Processing i 1229\n",
      "Token Length--- 14767\n",
      "Processing i 1230\n",
      "Token Length--- 16594\n",
      "Processing i 1231\n",
      "Token Length--- 18230\n",
      "Processing i 1232\n",
      "Token Length--- 10444\n",
      "Processing i 1233\n",
      "Token Length--- 9480\n",
      "Processing i 1234\n",
      "Token Length--- 1542\n",
      "Processing i 1235\n",
      "Token Length--- 4844\n",
      "Processing i 1236\n",
      "Token Length--- 5273\n",
      "Processing i 1237\n",
      "Token Length--- 3741\n",
      "Processing i 1238\n",
      "Token Length--- 1201\n",
      "Processing i 1239\n",
      "Token Length--- 1696\n",
      "Processing i 1240\n",
      "Token Length--- 24931\n",
      "Processing i 1241\n",
      "Token Length--- 3911\n",
      "Processing i 1242\n",
      "Token Length--- 11967\n",
      "Processing i 1243\n",
      "Token Length--- 4324\n",
      "Processing i 1244\n",
      "Token Length--- 7595\n",
      "Processing i 1245\n",
      "Token Length--- 15377\n",
      "Processing i 1246\n",
      "Token Length--- 2978\n",
      "Processing i 1247\n",
      "Token Length--- 6321\n",
      "Processing i 1248\n",
      "Token Length--- 5785\n",
      "Processing i 1249\n",
      "Token Length--- 10246\n",
      "Processing i 1250\n",
      "Token Length--- 7719\n",
      "Processing i 1251\n",
      "Token Length--- 4432\n",
      "Processing i 1252\n",
      "Token Length--- 7745\n",
      "Processing i 1253\n",
      "Token Length--- 5611\n",
      "Processing i 1254\n",
      "Token Length--- 6540\n",
      "Processing i 1255\n",
      "Token Length--- 7644\n",
      "Processing i 1256\n",
      "Token Length--- 6821\n",
      "Processing i 1257\n",
      "Token Length--- 10045\n",
      "Processing i 1258\n",
      "Token Length--- 10840\n",
      "Processing i 1259\n",
      "Token Length--- 11305\n",
      "Processing i 1260\n",
      "Token Length--- 2303\n",
      "Processing i 1261\n",
      "Token Length--- 1135\n",
      "Processing i 1262\n",
      "Token Length--- 19684\n",
      "Processing i 1263\n",
      "Token Length--- 8370\n",
      "Processing i 1264\n",
      "Token Length--- 8210\n",
      "Processing i 1265\n",
      "Token Length--- 9073\n",
      "Processing i 1266\n",
      "Token Length--- 14581\n",
      "Processing i 1267\n",
      "Token Length--- 27296\n",
      "Processing i 1268\n",
      "Token Length--- 14818\n",
      "Processing i 1269\n",
      "Token Length--- 7262\n",
      "Processing i 1270\n",
      "Token Length--- 12151\n",
      "Processing i 1271\n",
      "Token Length--- 17910\n",
      "Processing i 1272\n",
      "Token Length--- 4123\n",
      "Processing i 1273\n",
      "Token Length--- 6871\n",
      "Processing i 1274\n",
      "Token Length--- 3716\n",
      "Processing i 1275\n",
      "Token Length--- 3716\n",
      "Processing i 1276\n",
      "Token Length--- 5518\n",
      "Processing i 1277\n",
      "Token Length--- 8409\n",
      "Processing i 1278\n",
      "Token Length--- 21271\n",
      "Processing i 1279\n",
      "Token Length--- 35925\n",
      "Processing i 1280\n",
      "Token Length--- 5257\n",
      "Processing i 1281\n",
      "Token Length--- 6795\n",
      "Processing i 1282\n",
      "Token Length--- 15984\n",
      "Processing i 1283\n",
      "Token Length--- 3197\n",
      "Processing i 1284\n",
      "Token Length--- 3500\n",
      "Processing i 1285\n",
      "Token Length--- 11542\n",
      "Processing i 1286\n",
      "Token Length--- 2083\n",
      "Processing i 1287\n",
      "Token Length--- 6643\n",
      "Processing i 1288\n",
      "Token Length--- 4022\n",
      "Processing i 1289\n",
      "Token Length--- 731\n",
      "Processing i 1290\n",
      "Token Length--- 6681\n",
      "Processing i 1291\n",
      "Token Length--- 22627\n",
      "Processing i 1292\n",
      "Token Length--- 12550\n",
      "Processing i 1293\n",
      "Token Length--- 11227\n",
      "Processing i 1294\n",
      "Token Length--- 2237\n",
      "Processing i 1295\n",
      "Token Length--- 931\n",
      "Processing i 1296\n",
      "Token Length--- 17871\n",
      "Processing i 1297\n",
      "Token Length--- 9112\n",
      "Processing i 1298\n",
      "Token Length--- 7565\n",
      "Processing i 1299\n",
      "Token Length--- 1407\n",
      "Processing i 1300\n",
      "Token Length--- 17215\n",
      "Processing i 1301\n",
      "Token Length--- 29852\n",
      "Processing i 1302\n",
      "Token Length--- 4168\n",
      "Processing i 1303\n",
      "Token Length--- 2096\n",
      "Processing i 1304\n",
      "Token Length--- 11773\n",
      "Processing i 1305\n",
      "Token Length--- 12159\n",
      "Processing i 1306\n",
      "Token Length--- 18557\n",
      "Processing i 1307\n",
      "Token Length--- 14390\n",
      "Processing i 1308\n",
      "Token Length--- 1131\n",
      "Processing i 1309\n",
      "Token Length--- 21051\n",
      "Processing i 1310\n",
      "Token Length--- 38727\n",
      "Processing i 1311\n",
      "Token Length--- 3029\n",
      "Processing i 1312\n",
      "Token Length--- 22404\n",
      "Processing i 1313\n",
      "Token Length--- 1778\n",
      "Processing i 1314\n",
      "Token Length--- 31069\n",
      "Processing i 1315\n",
      "Token Length--- 3569\n",
      "Processing i 1316\n",
      "Token Length--- 2477\n",
      "Processing i 1317\n",
      "Token Length--- 5617\n",
      "Processing i 1318\n",
      "Token Length--- 26410\n",
      "Processing i 1319\n",
      "Token Length--- 22249\n",
      "Processing i 1320\n",
      "Token Length--- 7591\n",
      "Processing i 1321\n",
      "Token Length--- 8436\n",
      "Processing i 1322\n",
      "Token Length--- 15620\n",
      "Processing i 1323\n",
      "Token Length--- 5525\n",
      "Processing i 1324\n",
      "Token Length--- 5760\n",
      "Processing i 1325\n",
      "Token Length--- 12169\n",
      "Processing i 1326\n",
      "Token Length--- 8484\n",
      "Processing i 1327\n",
      "Token Length--- 12704\n",
      "Processing i 1328\n",
      "Token Length--- 6561\n",
      "Processing i 1329\n",
      "Token Length--- 6203\n",
      "Processing i 1330\n",
      "Token Length--- 2225\n",
      "Processing i 1331\n",
      "Token Length--- 13185\n",
      "Processing i 1332\n",
      "Token Length--- 9342\n",
      "Processing i 1333\n",
      "Token Length--- 5220\n",
      "Processing i 1334\n",
      "Token Length--- 4986\n",
      "Processing i 1335\n",
      "Token Length--- 8579\n",
      "Processing i 1336\n",
      "Token Length--- 4651\n",
      "Processing i 1337\n",
      "Token Length--- 7911\n",
      "Processing i 1338\n",
      "Token Length--- 4397\n",
      "Processing i 1339\n",
      "Token Length--- 5404\n",
      "Processing i 1340\n",
      "Token Length--- 7957\n",
      "Processing i 1341\n",
      "Token Length--- 3222\n",
      "Processing i 1342\n",
      "Token Length--- 1339\n",
      "Processing i 1343\n",
      "Token Length--- 21843\n",
      "Processing i 1344\n",
      "Token Length--- 13855\n",
      "Processing i 1345\n",
      "Token Length--- 14426\n",
      "Processing i 1346\n",
      "Token Length--- 3377\n",
      "Processing i 1347\n",
      "Token Length--- 1176\n",
      "Processing i 1348\n",
      "Token Length--- 17884\n",
      "Processing i 1349\n",
      "Token Length--- 1395\n",
      "Processing i 1350\n",
      "Token Length--- 14385\n",
      "Processing i 1351\n",
      "Token Length--- 5229\n",
      "Processing i 1352\n",
      "Token Length--- 19478\n",
      "Processing i 1353\n",
      "Token Length--- 4202\n",
      "Processing i 1354\n",
      "Token Length--- 3511\n",
      "Processing i 1355\n",
      "Token Length--- 832\n",
      "Processing i 1356\n",
      "Token Length--- 35425\n",
      "Processing i 1357\n",
      "Token Length--- 2220\n",
      "Processing i 1358\n",
      "Token Length--- 1793\n",
      "Processing i 1359\n",
      "Token Length--- 30036\n",
      "Processing i 1360\n",
      "Token Length--- 4802\n",
      "Processing i 1361\n",
      "Token Length--- 12366\n",
      "Processing i 1362\n",
      "Token Length--- 11678\n",
      "Processing i 1363\n",
      "Token Length--- 14234\n",
      "Processing i 1364\n",
      "Token Length--- 3237\n",
      "Processing i 1365\n",
      "Token Length--- 6994\n",
      "Processing i 1366\n",
      "Token Length--- 17630\n",
      "Processing i 1367\n",
      "Token Length--- 1682\n",
      "Processing i 1368\n",
      "Token Length--- 3383\n",
      "Processing i 1369\n",
      "Token Length--- 21410\n",
      "Processing i 1370\n",
      "Token Length--- 542\n",
      "Processing i 1371\n",
      "Token Length--- 18202\n",
      "Processing i 1372\n",
      "Token Length--- 6286\n",
      "Processing i 1373\n",
      "Token Length--- 8111\n",
      "Processing i 1374\n",
      "Token Length--- 25579\n",
      "Processing i 1375\n",
      "Token Length--- 4866\n",
      "Processing i 1376\n",
      "Token Length--- 3939\n",
      "Processing i 1377\n",
      "Token Length--- 4490\n",
      "Processing i 1378\n",
      "Token Length--- 1607\n",
      "Processing i 1379\n",
      "Token Length--- 7082\n",
      "Processing i 1380\n",
      "Token Length--- 31452\n",
      "Processing i 1381\n",
      "Token Length--- 13906\n",
      "Processing i 1382\n",
      "Token Length--- 10577\n",
      "Processing i 1383\n",
      "Token Length--- 30730\n",
      "Processing i 1384\n",
      "Token Length--- 7696\n",
      "Processing i 1385\n",
      "Token Length--- 1510\n",
      "Processing i 1386\n",
      "Token Length--- 1694\n",
      "Processing i 1387\n",
      "Token Length--- 7403\n",
      "Processing i 1388\n",
      "Token Length--- 5974\n",
      "Processing i 1389\n",
      "Token Length--- 1139\n",
      "Processing i 1390\n",
      "Token Length--- 14136\n",
      "Processing i 1391\n",
      "Token Length--- 12183\n",
      "Processing i 1392\n",
      "Token Length--- 2892\n",
      "Processing i 1393\n",
      "Token Length--- 6452\n",
      "Processing i 1394\n",
      "Token Length--- 1860\n",
      "Processing i 1395\n",
      "Token Length--- 16743\n",
      "Processing i 1396\n",
      "Token Length--- 2518\n",
      "Processing i 1397\n",
      "Token Length--- 15110\n",
      "Processing i 1398\n",
      "Token Length--- 4114\n",
      "Processing i 1399\n",
      "Token Length--- 4728\n",
      "Processing i 1400\n",
      "Token Length--- 10488\n",
      "Processing i 1401\n",
      "Token Length--- 2924\n",
      "Processing i 1402\n",
      "Token Length--- 4239\n",
      "Processing i 1403\n",
      "Token Length--- 25697\n",
      "Processing i 1404\n",
      "Token Length--- 2922\n",
      "Processing i 1405\n",
      "Token Length--- 2609\n",
      "Processing i 1406\n",
      "Token Length--- 12536\n",
      "Processing i 1407\n",
      "Token Length--- 4437\n",
      "Processing i 1408\n",
      "Token Length--- 29525\n",
      "Processing i 1409\n",
      "Token Length--- 9541\n",
      "Processing i 1410\n",
      "Token Length--- 5956\n",
      "Processing i 1411\n",
      "Token Length--- 6880\n",
      "Processing i 1412\n",
      "Token Length--- 11758\n",
      "Processing i 1413\n",
      "Token Length--- 3140\n",
      "Processing i 1414\n",
      "Token Length--- 7200\n",
      "Processing i 1415\n",
      "Token Length--- 1317\n",
      "Processing i 1416\n",
      "Token Length--- 4747\n",
      "Processing i 1417\n",
      "Token Length--- 40907\n",
      "Processing i 1418\n",
      "Token Length--- 6275\n",
      "Processing i 1419\n",
      "Token Length--- 1096\n",
      "Processing i 1420\n",
      "Token Length--- 3132\n",
      "Processing i 1421\n",
      "Token Length--- 3763\n",
      "Processing i 1422\n",
      "Token Length--- 12188\n",
      "Processing i 1423\n",
      "Token Length--- 6956\n",
      "Processing i 1424\n",
      "Token Length--- 8464\n",
      "Processing i 1425\n",
      "Token Length--- 16666\n",
      "Processing i 1426\n",
      "Token Length--- 4145\n",
      "Processing i 1427\n",
      "Token Length--- 19243\n",
      "Processing i 1428\n",
      "Token Length--- 3324\n",
      "Processing i 1429\n",
      "Token Length--- 22672\n",
      "Processing i 1430\n",
      "Token Length--- 4235\n",
      "Processing i 1431\n",
      "Token Length--- 17640\n",
      "Processing i 1432\n",
      "Token Length--- 6905\n",
      "Processing i 1433\n",
      "Token Length--- 10101\n",
      "Processing i 1434\n",
      "Token Length--- 12758\n",
      "Processing i 1435\n",
      "Token Length--- 6778\n",
      "Processing i 1436\n",
      "Token Length--- 28824\n",
      "Processing i 1437\n",
      "Token Length--- 10811\n",
      "Processing i 1438\n",
      "Token Length--- 2793\n",
      "Processing i 1439\n",
      "Token Length--- 8486\n",
      "Processing i 1440\n",
      "Token Length--- 2061\n",
      "Processing i 1441\n",
      "Token Length--- 13786\n",
      "Processing i 1442\n",
      "Token Length--- 10092\n",
      "Processing i 1443\n",
      "Token Length--- 6292\n",
      "Processing i 1444\n",
      "Token Length--- 1706\n",
      "Processing i 1445\n",
      "Token Length--- 17423\n",
      "Processing i 1446\n",
      "Token Length--- 5141\n",
      "Processing i 1447\n",
      "Token Length--- 23225\n",
      "Processing i 1448\n",
      "Token Length--- 26175\n",
      "Processing i 1449\n",
      "Token Length--- 1856\n",
      "Processing i 1450\n",
      "Token Length--- 2960\n",
      "Processing i 1451\n",
      "Token Length--- 5404\n",
      "Processing i 1452\n",
      "Token Length--- 3159\n",
      "Processing i 1453\n",
      "Token Length--- 9297\n",
      "Processing i 1454\n",
      "Token Length--- 1799\n",
      "Processing i 1455\n",
      "Token Length--- 6646\n",
      "Processing i 1456\n",
      "Token Length--- 8510\n",
      "Processing i 1457\n",
      "Token Length--- 2613\n",
      "Processing i 1458\n",
      "Token Length--- 10388\n",
      "Processing i 1459\n",
      "Token Length--- 15842\n",
      "Processing i 1460\n",
      "Token Length--- 6484\n",
      "Processing i 1461\n",
      "Token Length--- 13366\n",
      "Processing i 1462\n",
      "Token Length--- 2974\n",
      "Processing i 1463\n",
      "Token Length--- 3519\n",
      "Processing i 1464\n",
      "Token Length--- 8307\n",
      "Processing i 1465\n",
      "Token Length--- 5424\n",
      "Processing i 1466\n",
      "Token Length--- 3005\n",
      "Processing i 1467\n",
      "Token Length--- 5852\n",
      "Processing i 1468\n",
      "Token Length--- 18048\n",
      "Processing i 1469\n",
      "Token Length--- 12096\n",
      "Processing i 1470\n",
      "Token Length--- 38165\n",
      "Processing i 1471\n",
      "Token Length--- 4763\n",
      "Processing i 1472\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Token Length--- 44519\n",
      "Processing i 1473\n",
      "Token Length--- 12993\n",
      "Processing i 1474\n",
      "Token Length--- 12759\n",
      "Processing i 1475\n",
      "Token Length--- 12124\n",
      "Processing i 1476\n",
      "Token Length--- 6657\n",
      "Processing i 1477\n",
      "Token Length--- 610\n",
      "Processing i 1478\n",
      "Token Length--- 8979\n",
      "Processing i 1479\n",
      "Token Length--- 11914\n",
      "Processing i 1480\n",
      "Token Length--- 1191\n",
      "Processing i 1481\n",
      "Token Length--- 15172\n",
      "Processing i 1482\n",
      "Token Length--- 7419\n",
      "Processing i 1483\n",
      "Token Length--- 31122\n",
      "Processing i 1484\n",
      "Token Length--- 5507\n",
      "Processing i 1485\n",
      "Token Length--- 8110\n",
      "Processing i 1486\n",
      "Token Length--- 17517\n",
      "Processing i 1487\n",
      "Token Length--- 8047\n",
      "Processing i 1488\n",
      "Token Length--- 3594\n",
      "Processing i 1489\n",
      "Token Length--- 29948\n",
      "Processing i 1490\n",
      "Token Length--- 4285\n",
      "Processing i 1491\n",
      "Token Length--- 3141\n",
      "Processing i 1492\n",
      "Token Length--- 11338\n",
      "Processing i 1493\n",
      "Token Length--- 14681\n",
      "Processing i 1494\n",
      "Token Length--- 3920\n",
      "Processing i 1495\n",
      "Token Length--- 2096\n",
      "Processing i 1496\n",
      "Token Length--- 3891\n",
      "Processing i 1497\n",
      "Token Length--- 3096\n",
      "Processing i 1498\n",
      "Token Length--- 6744\n",
      "Processing i 1499\n",
      "Token Length--- 12896\n",
      "Processing i 1500\n",
      "Token Length--- 1179\n",
      "Processing i 1501\n",
      "Token Length--- 1203\n",
      "Processing i 1502\n",
      "Token Length--- 1975\n",
      "Processing i 1503\n",
      "Token Length--- 18777\n",
      "Processing i 1504\n",
      "Token Length--- 10030\n",
      "Processing i 1505\n",
      "Token Length--- 12167\n",
      "Processing i 1506\n",
      "Token Length--- 5185\n",
      "Processing i 1507\n",
      "Token Length--- 11920\n",
      "Processing i 1508\n",
      "Token Length--- 8798\n",
      "Processing i 1509\n",
      "Token Length--- 12104\n",
      "Processing i 1510\n",
      "Token Length--- 10344\n",
      "Processing i 1511\n",
      "Token Length--- 12466\n",
      "Processing i 1512\n",
      "Token Length--- 13397\n",
      "Processing i 1513\n",
      "Token Length--- 13816\n",
      "Processing i 1514\n",
      "Token Length--- 10987\n",
      "Processing i 1515\n",
      "Token Length--- 10540\n",
      "Processing i 1516\n",
      "Token Length--- 7583\n",
      "Processing i 1517\n",
      "Token Length--- 3615\n",
      "Processing i 1518\n",
      "Token Length--- 3818\n",
      "Processing i 1519\n",
      "Token Length--- 3893\n",
      "Processing i 1520\n",
      "Token Length--- 3021\n",
      "Processing i 1521\n",
      "Token Length--- 32649\n",
      "Processing i 1522\n",
      "Token Length--- 7689\n",
      "Processing i 1523\n",
      "Token Length--- 25839\n",
      "Processing i 1524\n",
      "Token Length--- 1750\n",
      "Processing i 1525\n",
      "Token Length--- 9852\n",
      "Processing i 1526\n",
      "Token Length--- 2217\n",
      "Processing i 1527\n",
      "Token Length--- 2482\n",
      "Processing i 1528\n",
      "Token Length--- 19158\n",
      "Processing i 1529\n",
      "Token Length--- 6155\n",
      "Processing i 1530\n",
      "Token Length--- 39921\n",
      "Processing i 1531\n",
      "Token Length--- 4514\n",
      "Processing i 1532\n",
      "Token Length--- 3810\n",
      "Processing i 1533\n",
      "Token Length--- 12071\n",
      "Processing i 1534\n",
      "Token Length--- 8274\n",
      "Processing i 1535\n",
      "Token Length--- 14517\n",
      "Processing i 1536\n",
      "Token Length--- 9593\n",
      "Processing i 1537\n",
      "Token Length--- 7094\n",
      "Processing i 1538\n",
      "Token Length--- 8903\n",
      "Processing i 1539\n",
      "Token Length--- 15554\n",
      "Processing i 1540\n",
      "Token Length--- 7803\n",
      "Processing i 1541\n",
      "Token Length--- 8728\n",
      "Processing i 1542\n",
      "Token Length--- 2395\n",
      "Processing i 1543\n",
      "Token Length--- 11201\n",
      "Processing i 1544\n",
      "Token Length--- 2213\n",
      "Processing i 1545\n",
      "Token Length--- 11480\n",
      "Processing i 1546\n",
      "Token Length--- 4370\n",
      "Processing i 1547\n",
      "Token Length--- 7569\n",
      "Processing i 1548\n",
      "Token Length--- 6447\n",
      "Processing i 1549\n",
      "Token Length--- 1689\n",
      "Processing i 1550\n",
      "Token Length--- 5951\n",
      "Processing i 1551\n",
      "Token Length--- 3904\n",
      "Processing i 1552\n",
      "Token Length--- 17310\n",
      "Processing i 1553\n",
      "Token Length--- 12473\n",
      "Processing i 1554\n",
      "Token Length--- 3890\n",
      "Processing i 1555\n",
      "Token Length--- 6925\n",
      "Processing i 1556\n",
      "Token Length--- 5429\n",
      "Processing i 1557\n",
      "Token Length--- 2594\n",
      "Processing i 1558\n",
      "Token Length--- 3586\n",
      "Processing i 1559\n",
      "Token Length--- 6092\n",
      "Processing i 1560\n",
      "Token Length--- 6590\n",
      "Processing i 1561\n",
      "Token Length--- 479\n",
      "Processing i 1562\n",
      "Token Length--- 4997\n",
      "Processing i 1563\n",
      "Token Length--- 5534\n",
      "Processing i 1564\n",
      "Token Length--- 5607\n",
      "Processing i 1565\n",
      "Token Length--- 17826\n",
      "Processing i 1566\n",
      "Token Length--- 11650\n",
      "Processing i 1567\n",
      "Token Length--- 9727\n",
      "Processing i 1568\n",
      "Token Length--- 9759\n",
      "Processing i 1569\n",
      "Token Length--- 791\n",
      "Processing i 1570\n",
      "Token Length--- 7339\n",
      "Processing i 1571\n",
      "Token Length--- 3890\n",
      "Processing i 1572\n",
      "Token Length--- 9337\n",
      "Processing i 1573\n",
      "Token Length--- 12120\n",
      "Processing i 1574\n",
      "Token Length--- 17609\n",
      "Processing i 1575\n",
      "Token Length--- 8448\n",
      "Processing i 1576\n",
      "Token Length--- 16248\n",
      "Processing i 1577\n",
      "Token Length--- 3930\n",
      "Processing i 1578\n",
      "Token Length--- 4545\n",
      "Processing i 1579\n",
      "Token Length--- 6119\n",
      "Processing i 1580\n",
      "Token Length--- 10840\n",
      "Processing i 1581\n",
      "Token Length--- 23433\n",
      "Processing i 1582\n",
      "Token Length--- 7796\n",
      "Processing i 1583\n",
      "Token Length--- 17043\n",
      "Processing i 1584\n",
      "Token Length--- 11430\n",
      "Processing i 1585\n",
      "Token Length--- 17041\n",
      "Processing i 1586\n",
      "Token Length--- 2146\n",
      "Processing i 1587\n",
      "Token Length--- 1610\n",
      "Processing i 1588\n",
      "Token Length--- 18169\n",
      "Processing i 1589\n",
      "Token Length--- 910\n",
      "Processing i 1590\n",
      "Token Length--- 19629\n",
      "Processing i 1591\n",
      "Token Length--- 6903\n",
      "Processing i 1592\n",
      "Token Length--- 18529\n",
      "Processing i 1593\n",
      "Token Length--- 4933\n",
      "Processing i 1594\n",
      "Token Length--- 17779\n",
      "Processing i 1595\n",
      "Token Length--- 6241\n",
      "Processing i 1596\n",
      "Token Length--- 8844\n",
      "Processing i 1597\n",
      "Token Length--- 4491\n",
      "Processing i 1598\n",
      "Token Length--- 4590\n",
      "Processing i 1599\n",
      "Token Length--- 2731\n",
      "Processing i 1600\n",
      "Token Length--- 2059\n",
      "Processing i 1601\n",
      "Token Length--- 3793\n",
      "Processing i 1602\n",
      "Token Length--- 18082\n",
      "Processing i 1603\n",
      "Token Length--- 471\n",
      "Processing i 1604\n",
      "Token Length--- 14243\n",
      "Processing i 1605\n",
      "Token Length--- 4543\n",
      "Processing i 1606\n",
      "Token Length--- 2407\n",
      "Processing i 1607\n",
      "Token Length--- 1786\n",
      "Processing i 1608\n",
      "Token Length--- 10804\n",
      "Processing i 1609\n",
      "Token Length--- 25460\n",
      "Processing i 1610\n",
      "Token Length--- 6824\n",
      "Processing i 1611\n",
      "Token Length--- 21956\n",
      "Processing i 1612\n",
      "Token Length--- 2057\n",
      "Processing i 1613\n",
      "Token Length--- 4997\n",
      "Processing i 1614\n",
      "Token Length--- 4932\n",
      "Processing i 1615\n",
      "Token Length--- 17839\n",
      "Processing i 1616\n",
      "Token Length--- 2737\n",
      "Processing i 1617\n",
      "Token Length--- 12409\n",
      "Processing i 1618\n",
      "Token Length--- 51260\n",
      "Processing i 1619\n",
      "Token Length--- 16362\n",
      "Processing i 1620\n",
      "Token Length--- 1688\n",
      "Processing i 1621\n",
      "Token Length--- 4321\n",
      "Processing i 1622\n",
      "Token Length--- 5097\n",
      "Processing i 1623\n",
      "Token Length--- 6924\n",
      "Processing i 1624\n",
      "Token Length--- 14690\n",
      "Processing i 1625\n",
      "Token Length--- 3209\n",
      "Processing i 1626\n",
      "Token Length--- 18225\n",
      "Processing i 1627\n",
      "Token Length--- 2900\n",
      "Processing i 1628\n",
      "Token Length--- 6531\n",
      "Processing i 1629\n",
      "Token Length--- 2135\n",
      "Processing i 1630\n",
      "Token Length--- 5350\n",
      "Processing i 1631\n",
      "Token Length--- 16752\n",
      "Processing i 1632\n",
      "Token Length--- 5380\n",
      "Processing i 1633\n",
      "Token Length--- 19948\n",
      "Processing i 1634\n",
      "Token Length--- 4943\n",
      "Processing i 1635\n",
      "Token Length--- 8351\n",
      "Processing i 1636\n",
      "Token Length--- 3719\n",
      "Processing i 1637\n",
      "Token Length--- 3796\n",
      "Processing i 1638\n",
      "Token Length--- 18804\n",
      "Processing i 1639\n",
      "Token Length--- 3799\n",
      "Processing i 1640\n",
      "Token Length--- 1076\n",
      "Processing i 1641\n",
      "Token Length--- 5573\n",
      "Processing i 1642\n",
      "Token Length--- 1857\n",
      "Processing i 1643\n",
      "Token Length--- 2025\n",
      "Processing i 1644\n",
      "Token Length--- 1991\n",
      "Processing i 1645\n",
      "Token Length--- 5096\n",
      "Processing i 1646\n",
      "Token Length--- 4025\n",
      "Processing i 1647\n",
      "Token Length--- 5372\n",
      "Processing i 1648\n",
      "Token Length--- 12188\n",
      "Processing i 1649\n",
      "Token Length--- 5723\n",
      "Processing i 1650\n",
      "Token Length--- 15373\n",
      "Processing i 1651\n",
      "Token Length--- 19744\n",
      "Processing i 1652\n",
      "Token Length--- 17309\n",
      "Processing i 1653\n",
      "Token Length--- 6510\n",
      "Processing i 1654\n",
      "Token Length--- 3121\n",
      "Processing i 1655\n",
      "Token Length--- 5929\n",
      "Processing i 1656\n",
      "Token Length--- 13631\n",
      "Processing i 1657\n",
      "Token Length--- 7127\n",
      "Processing i 1658\n",
      "Token Length--- 15984\n",
      "Processing i 1659\n",
      "Token Length--- 5016\n",
      "Processing i 1660\n",
      "Token Length--- 16166\n",
      "Processing i 1661\n",
      "Token Length--- 38851\n",
      "Processing i 1662\n",
      "Token Length--- 3136\n",
      "Processing i 1663\n",
      "Token Length--- 7630\n",
      "Processing i 1664\n",
      "Token Length--- 2856\n",
      "Processing i 1665\n",
      "Token Length--- 6971\n",
      "Processing i 1666\n",
      "Token Length--- 6966\n",
      "Processing i 1667\n",
      "Token Length--- 14346\n",
      "Processing i 1668\n",
      "Token Length--- 14464\n",
      "Processing i 1669\n",
      "Token Length--- 5959\n",
      "Processing i 1670\n",
      "Token Length--- 16211\n",
      "Processing i 1671\n",
      "Token Length--- 777\n",
      "Processing i 1672\n",
      "Token Length--- 16462\n",
      "Processing i 1673\n",
      "Token Length--- 14344\n",
      "Processing i 1674\n",
      "Token Length--- 7161\n",
      "Processing i 1675\n",
      "Token Length--- 10522\n",
      "Processing i 1676\n",
      "Token Length--- 9150\n",
      "Processing i 1677\n",
      "Token Length--- 4816\n",
      "Processing i 1678\n",
      "Token Length--- 2659\n",
      "Processing i 1679\n",
      "Token Length--- 2355\n",
      "Processing i 1680\n",
      "Token Length--- 6163\n",
      "Processing i 1681\n",
      "Token Length--- 4863\n",
      "Processing i 1682\n",
      "Token Length--- 7004\n",
      "Processing i 1683\n",
      "Token Length--- 9127\n",
      "Processing i 1684\n",
      "Token Length--- 26060\n",
      "Processing i 1685\n",
      "Token Length--- 9446\n",
      "Processing i 1686\n",
      "Token Length--- 12126\n",
      "Processing i 1687\n",
      "Token Length--- 22798\n",
      "Processing i 1688\n",
      "Token Length--- 1693\n",
      "Processing i 1689\n",
      "Token Length--- 13021\n",
      "Processing i 1690\n",
      "Token Length--- 4865\n",
      "Processing i 1691\n",
      "Token Length--- 4626\n",
      "Processing i 1692\n",
      "Token Length--- 4520\n",
      "Processing i 1693\n",
      "Token Length--- 1814\n",
      "Processing i 1694\n",
      "Token Length--- 10841\n",
      "Processing i 1695\n",
      "Token Length--- 9721\n",
      "Processing i 1696\n",
      "Token Length--- 10945\n",
      "Processing i 1697\n",
      "Token Length--- 15757\n",
      "Processing i 1698\n",
      "Token Length--- 6057\n",
      "Processing i 1699\n",
      "Token Length--- 3304\n",
      "Processing i 1700\n",
      "Token Length--- 19347\n",
      "Processing i 1701\n",
      "Token Length--- 990\n",
      "Processing i 1702\n",
      "Token Length--- 16818\n",
      "Processing i 1703\n",
      "Token Length--- 27843\n",
      "Processing i 1704\n",
      "Token Length--- 16782\n",
      "Processing i 1705\n",
      "Token Length--- 14706\n",
      "Processing i 1706\n",
      "Token Length--- 18237\n",
      "Processing i 1707\n",
      "Token Length--- 11669\n",
      "Processing i 1708\n",
      "Token Length--- 6015\n",
      "Processing i 1709\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Token Length--- 11766\n",
      "Processing i 1710\n",
      "Token Length--- 4021\n",
      "Processing i 1711\n",
      "Token Length--- 4899\n",
      "Processing i 1712\n",
      "Token Length--- 11087\n",
      "Processing i 1713\n",
      "Token Length--- 2972\n",
      "Processing i 1714\n",
      "Token Length--- 21181\n",
      "Processing i 1715\n",
      "Token Length--- 10931\n",
      "Processing i 1716\n",
      "Token Length--- 3684\n",
      "Processing i 1717\n",
      "Token Length--- 5541\n",
      "Processing i 1718\n",
      "Token Length--- 3676\n",
      "Processing i 1719\n",
      "Token Length--- 2329\n",
      "Processing i 1720\n",
      "Token Length--- 6883\n",
      "Processing i 1721\n",
      "Token Length--- 17871\n",
      "Processing i 1722\n",
      "Token Length--- 9791\n",
      "Processing i 1723\n",
      "Token Length--- 13273\n",
      "Processing i 1724\n",
      "Token Length--- 2124\n",
      "Processing i 1725\n",
      "Token Length--- 18467\n",
      "Processing i 1726\n",
      "Token Length--- 10821\n",
      "Processing i 1727\n",
      "Token Length--- 19652\n",
      "Processing i 1728\n",
      "Token Length--- 449\n",
      "Processing i 1729\n",
      "Token Length--- 16488\n",
      "Processing i 1730\n",
      "Token Length--- 2545\n",
      "Processing i 1731\n",
      "Token Length--- 6511\n",
      "Processing i 1732\n",
      "Token Length--- 6329\n",
      "Processing i 1733\n",
      "Token Length--- 8539\n",
      "Processing i 1734\n",
      "Token Length--- 20140\n",
      "Processing i 1735\n",
      "Token Length--- 7896\n",
      "Processing i 1736\n",
      "Token Length--- 18719\n",
      "Processing i 1737\n",
      "Token Length--- 10065\n",
      "Processing i 1738\n",
      "Token Length--- 8379\n",
      "Processing i 1739\n",
      "Token Length--- 10548\n",
      "Processing i 1740\n",
      "Token Length--- 4805\n",
      "Processing i 1741\n",
      "Token Length--- 32136\n",
      "Processing i 1742\n",
      "Token Length--- 16222\n",
      "Processing i 1743\n",
      "Token Length--- 5912\n",
      "Processing i 1744\n",
      "Token Length--- 2961\n",
      "Processing i 1745\n",
      "Token Length--- 6013\n",
      "Processing i 1746\n",
      "Token Length--- 8177\n",
      "Processing i 1747\n",
      "Token Length--- 12014\n",
      "Processing i 1748\n",
      "Token Length--- 1691\n",
      "Processing i 1749\n",
      "Token Length--- 17867\n",
      "Processing i 1750\n",
      "Token Length--- 18303\n",
      "Processing i 1751\n",
      "Token Length--- 844\n",
      "Processing i 1752\n",
      "Token Length--- 1990\n",
      "Processing i 1753\n",
      "Token Length--- 2186\n",
      "Processing i 1754\n",
      "Token Length--- 2239\n",
      "Processing i 1755\n",
      "Token Length--- 2947\n",
      "Processing i 1756\n",
      "Token Length--- 2146\n",
      "Processing i 1757\n",
      "Token Length--- 4281\n",
      "Processing i 1758\n",
      "Token Length--- 5426\n",
      "Processing i 1759\n",
      "Token Length--- 4615\n",
      "Processing i 1760\n",
      "Token Length--- 6438\n",
      "Processing i 1761\n",
      "Token Length--- 16879\n",
      "Processing i 1762\n",
      "Token Length--- 12212\n",
      "Processing i 1763\n",
      "Token Length--- 7580\n",
      "Processing i 1764\n",
      "Token Length--- 16040\n",
      "Processing i 1765\n",
      "Token Length--- 11320\n",
      "Processing i 1766\n",
      "Token Length--- 5980\n",
      "Processing i 1767\n",
      "Token Length--- 17555\n",
      "Processing i 1768\n",
      "Token Length--- 12984\n",
      "Processing i 1769\n",
      "Token Length--- 2736\n",
      "Processing i 1770\n",
      "Token Length--- 13035\n",
      "Processing i 1771\n",
      "Token Length--- 828\n",
      "Processing i 1772\n",
      "Token Length--- 10154\n",
      "Processing i 1773\n",
      "Token Length--- 3264\n",
      "Processing i 1774\n",
      "Token Length--- 1727\n",
      "Processing i 1775\n",
      "Token Length--- 419\n",
      "Processing i 1776\n",
      "Token Length--- 2797\n",
      "Processing i 1777\n",
      "Token Length--- 16407\n",
      "Processing i 1778\n",
      "Token Length--- 4988\n",
      "Processing i 1779\n",
      "Token Length--- 15271\n",
      "Processing i 1780\n",
      "Token Length--- 5495\n",
      "Processing i 1781\n",
      "Token Length--- 7832\n",
      "Processing i 1782\n",
      "Token Length--- 3843\n",
      "Processing i 1783\n",
      "Token Length--- 855\n",
      "Processing i 1784\n",
      "Token Length--- 5456\n",
      "Processing i 1785\n",
      "Token Length--- 4218\n",
      "Processing i 1786\n",
      "Token Length--- 5131\n",
      "Processing i 1787\n",
      "Token Length--- 5464\n",
      "Processing i 1788\n",
      "Token Length--- 977\n",
      "Processing i 1789\n",
      "Token Length--- 2651\n",
      "Processing i 1790\n",
      "Token Length--- 20332\n",
      "Processing i 1791\n",
      "Token Length--- 14666\n",
      "Processing i 1792\n",
      "Token Length--- 1461\n",
      "Processing i 1793\n",
      "Token Length--- 6969\n",
      "Processing i 1794\n",
      "Token Length--- 5309\n",
      "Processing i 1795\n",
      "Token Length--- 3006\n",
      "Processing i 1796\n",
      "Token Length--- 4097\n",
      "Processing i 1797\n",
      "Token Length--- 10272\n",
      "Processing i 1798\n",
      "Token Length--- 4447\n",
      "Processing i 1799\n",
      "Token Length--- 7514\n",
      "Processing i 1800\n",
      "Token Length--- 22895\n",
      "Processing i 1801\n",
      "Token Length--- 24554\n",
      "Processing i 1802\n",
      "Token Length--- 5184\n",
      "Processing i 1803\n",
      "Token Length--- 5340\n",
      "Processing i 1804\n",
      "Token Length--- 886\n",
      "Processing i 1805\n",
      "Token Length--- 5460\n",
      "Processing i 1806\n",
      "Token Length--- 2084\n",
      "Processing i 1807\n",
      "Token Length--- 4405\n",
      "Processing i 1808\n",
      "Token Length--- 6100\n",
      "Processing i 1809\n",
      "Token Length--- 8951\n",
      "Processing i 1810\n",
      "Token Length--- 10774\n",
      "Processing i 1811\n",
      "Token Length--- 21777\n",
      "Processing i 1812\n",
      "Token Length--- 13679\n",
      "Processing i 1813\n",
      "Token Length--- 3284\n",
      "Processing i 1814\n",
      "Token Length--- 4639\n",
      "Processing i 1815\n",
      "Token Length--- 10325\n",
      "Processing i 1816\n",
      "Token Length--- 9507\n",
      "Processing i 1817\n",
      "Token Length--- 13045\n",
      "Processing i 1818\n",
      "Token Length--- 13693\n",
      "Processing i 1819\n",
      "Token Length--- 18138\n",
      "Processing i 1820\n",
      "Token Length--- 9476\n",
      "Processing i 1821\n",
      "Token Length--- 24593\n",
      "Processing i 1822\n",
      "Token Length--- 4937\n",
      "Processing i 1823\n",
      "Token Length--- 13553\n",
      "Processing i 1824\n",
      "Token Length--- 4943\n",
      "Processing i 1825\n",
      "Token Length--- 9341\n",
      "Processing i 1826\n",
      "Token Length--- 5218\n",
      "Processing i 1827\n",
      "Token Length--- 11836\n",
      "Processing i 1828\n",
      "Token Length--- 7405\n",
      "Processing i 1829\n",
      "Token Length--- 2273\n",
      "Processing i 1830\n",
      "Token Length--- 5934\n",
      "Processing i 1831\n",
      "Token Length--- 21127\n",
      "Processing i 1832\n",
      "Token Length--- 1248\n",
      "Processing i 1833\n",
      "Token Length--- 2940\n",
      "Processing i 1834\n",
      "Token Length--- 2543\n",
      "Processing i 1835\n",
      "Token Length--- 22015\n",
      "Processing i 1836\n",
      "Token Length--- 3943\n",
      "Processing i 1837\n",
      "Token Length--- 4287\n",
      "Processing i 1838\n",
      "Token Length--- 11762\n",
      "Processing i 1839\n",
      "Token Length--- 28756\n",
      "Processing i 1840\n",
      "Token Length--- 2128\n",
      "Processing i 1841\n",
      "Token Length--- 3729\n",
      "Processing i 1842\n",
      "Token Length--- 8144\n",
      "Processing i 1843\n",
      "Token Length--- 7110\n",
      "Processing i 1844\n",
      "Token Length--- 2694\n",
      "Processing i 1845\n",
      "Token Length--- 1164\n",
      "Processing i 1846\n",
      "Token Length--- 1588\n",
      "Processing i 1847\n",
      "Token Length--- 2827\n",
      "Processing i 1848\n",
      "Token Length--- 1949\n",
      "Processing i 1849\n",
      "Token Length--- 2115\n",
      "Processing i 1850\n",
      "Token Length--- 1152\n",
      "Processing i 1851\n",
      "Token Length--- 30078\n",
      "Processing i 1852\n",
      "Token Length--- 3316\n",
      "Processing i 1853\n",
      "Token Length--- 2315\n",
      "Processing i 1854\n",
      "Token Length--- 6061\n",
      "Processing i 1855\n",
      "Token Length--- 19490\n",
      "Processing i 1856\n",
      "Token Length--- 11035\n",
      "Processing i 1857\n",
      "Token Length--- 1943\n",
      "Processing i 1858\n",
      "Token Length--- 2964\n",
      "Processing i 1859\n",
      "Token Length--- 19893\n",
      "Processing i 1860\n",
      "Token Length--- 1674\n",
      "Processing i 1861\n",
      "Token Length--- 4399\n",
      "Processing i 1862\n",
      "Token Length--- 24554\n",
      "Processing i 1863\n",
      "Token Length--- 7120\n",
      "Processing i 1864\n",
      "Token Length--- 6214\n",
      "Processing i 1865\n",
      "Token Length--- 19371\n",
      "Processing i 1866\n",
      "Token Length--- 8899\n",
      "Processing i 1867\n",
      "Token Length--- 1547\n",
      "Processing i 1868\n",
      "Token Length--- 9039\n",
      "Processing i 1869\n",
      "Token Length--- 10115\n",
      "Processing i 1870\n",
      "Token Length--- 5139\n",
      "Processing i 1871\n",
      "Token Length--- 2059\n",
      "Processing i 1872\n",
      "Token Length--- 3313\n",
      "Processing i 1873\n",
      "Token Length--- 6608\n",
      "Processing i 1874\n",
      "Token Length--- 4001\n",
      "Processing i 1875\n",
      "Token Length--- 15923\n",
      "Processing i 1876\n",
      "Token Length--- 13175\n",
      "Processing i 1877\n",
      "Token Length--- 6454\n",
      "Processing i 1878\n",
      "Token Length--- 5586\n",
      "Processing i 1879\n",
      "Token Length--- 32743\n",
      "Processing i 1880\n",
      "Token Length--- 6619\n",
      "Processing i 1881\n",
      "Token Length--- 2796\n",
      "Processing i 1882\n",
      "Token Length--- 22757\n",
      "Processing i 1883\n",
      "Token Length--- 16254\n",
      "Processing i 1884\n",
      "Token Length--- 8464\n",
      "Processing i 1885\n",
      "Token Length--- 14674\n",
      "Processing i 1886\n",
      "Token Length--- 2939\n",
      "Processing i 1887\n",
      "Token Length--- 3115\n",
      "Processing i 1888\n",
      "Token Length--- 5873\n",
      "Processing i 1889\n",
      "Token Length--- 20590\n",
      "Processing i 1890\n",
      "Token Length--- 23396\n",
      "Processing i 1891\n",
      "Token Length--- 6638\n",
      "Processing i 1892\n",
      "Token Length--- 3373\n",
      "Processing i 1893\n",
      "Token Length--- 9301\n",
      "Processing i 1894\n",
      "Token Length--- 15157\n",
      "Processing i 1895\n",
      "Token Length--- 6960\n",
      "Processing i 1896\n",
      "Token Length--- 4514\n",
      "Processing i 1897\n",
      "Token Length--- 15304\n",
      "Processing i 1898\n",
      "Token Length--- 3256\n",
      "Processing i 1899\n",
      "Token Length--- 8355\n",
      "Processing i 1900\n",
      "Token Length--- 3503\n",
      "Processing i 1901\n",
      "Token Length--- 11586\n",
      "Processing i 1902\n",
      "Token Length--- 23276\n",
      "Processing i 1903\n",
      "Token Length--- 2491\n",
      "Processing i 1904\n",
      "Token Length--- 12909\n",
      "Processing i 1905\n",
      "Token Length--- 10458\n",
      "Processing i 1906\n",
      "Token Length--- 25690\n",
      "Processing i 1907\n",
      "Token Length--- 5409\n",
      "Processing i 1908\n",
      "Token Length--- 18848\n",
      "Processing i 1909\n",
      "Token Length--- 3861\n",
      "Processing i 1910\n",
      "Token Length--- 1873\n",
      "Processing i 1911\n",
      "Token Length--- 1070\n",
      "Processing i 1912\n",
      "Token Length--- 10353\n",
      "Processing i 1913\n",
      "Token Length--- 4601\n",
      "Processing i 1914\n",
      "Token Length--- 11757\n",
      "Processing i 1915\n",
      "Token Length--- 7006\n",
      "Processing i 1916\n",
      "Token Length--- 33130\n",
      "Processing i 1917\n",
      "Token Length--- 44814\n",
      "Processing i 1918\n",
      "Token Length--- 3066\n",
      "Processing i 1919\n",
      "Token Length--- 15051\n",
      "Processing i 1920\n",
      "Token Length--- 2339\n",
      "Processing i 1921\n",
      "Token Length--- 648\n",
      "Processing i 1922\n",
      "Token Length--- 3529\n",
      "Processing i 1923\n",
      "Token Length--- 3291\n",
      "Processing i 1924\n",
      "Token Length--- 19924\n",
      "Processing i 1925\n",
      "Token Length--- 502\n",
      "Processing i 1926\n",
      "Token Length--- 12841\n",
      "Processing i 1927\n",
      "Token Length--- 3380\n",
      "Processing i 1928\n",
      "Token Length--- 1662\n",
      "Processing i 1929\n",
      "Token Length--- 29108\n",
      "Processing i 1930\n",
      "Token Length--- 5333\n",
      "Processing i 1931\n",
      "Token Length--- 15615\n",
      "Processing i 1932\n",
      "Token Length--- 2055\n",
      "Processing i 1933\n",
      "Token Length--- 19369\n",
      "Processing i 1934\n",
      "Token Length--- 1560\n",
      "Processing i 1935\n",
      "Token Length--- 4708\n",
      "Processing i 1936\n",
      "Token Length--- 6043\n",
      "Processing i 1937\n",
      "Token Length--- 2827\n",
      "Processing i 1938\n",
      "Token Length--- 14608\n",
      "Processing i 1939\n",
      "Token Length--- 6813\n",
      "Processing i 1940\n",
      "Token Length--- 15911\n",
      "Processing i 1941\n",
      "Token Length--- 3141\n",
      "Processing i 1942\n",
      "Token Length--- 4588\n",
      "Processing i 1943\n",
      "Token Length--- 17383\n",
      "Processing i 1944\n",
      "Token Length--- 1942\n",
      "Processing i 1945\n",
      "Token Length--- 5712\n",
      "Processing i 1946\n",
      "Token Length--- 4391\n",
      "Processing i 1947\n",
      "Token Length--- 17798\n",
      "Processing i 1948\n",
      "Token Length--- 6423\n",
      "Processing i 1949\n",
      "Token Length--- 7298\n",
      "Processing i 1950\n",
      "Token Length--- 6057\n",
      "Processing i 1951\n",
      "Token Length--- 6672\n",
      "Processing i 1952\n",
      "Token Length--- 22422\n",
      "Processing i 1953\n",
      "Token Length--- 1530\n",
      "Processing i 1954\n",
      "Token Length--- 7661\n",
      "Processing i 1955\n",
      "Token Length--- 692\n",
      "Processing i 1956\n",
      "Token Length--- 17172\n",
      "Processing i 1957\n",
      "Token Length--- 11356\n",
      "Processing i 1958\n",
      "Token Length--- 6806\n",
      "Processing i 1959\n",
      "Token Length--- 1304\n",
      "Processing i 1960\n",
      "Token Length--- 6580\n",
      "Processing i 1961\n",
      "Token Length--- 3793\n",
      "Processing i 1962\n",
      "Token Length--- 34706\n",
      "Processing i 1963\n",
      "Token Length--- 5629\n",
      "Processing i 1964\n",
      "Token Length--- 6144\n",
      "Processing i 1965\n",
      "Token Length--- 1477\n",
      "Processing i 1966\n",
      "Token Length--- 6489\n",
      "Processing i 1967\n",
      "Token Length--- 5923\n",
      "Processing i 1968\n",
      "Token Length--- 2700\n",
      "Processing i 1969\n",
      "Token Length--- 4901\n",
      "Processing i 1970\n",
      "Token Length--- 20706\n",
      "Processing i 1971\n",
      "Token Length--- 4687\n",
      "Processing i 1972\n",
      "Token Length--- 5986\n",
      "Processing i 1973\n",
      "Token Length--- 38931\n",
      "Processing i 1974\n",
      "Token Length--- 4506\n",
      "Processing i 1975\n",
      "Token Length--- 6161\n",
      "Processing i 1976\n",
      "Token Length--- 8963\n",
      "Processing i 1977\n",
      "Token Length--- 5461\n",
      "Processing i 1978\n",
      "Token Length--- 3091\n",
      "Processing i 1979\n",
      "Token Length--- 2769\n",
      "Processing i 1980\n",
      "Token Length--- 9339\n",
      "Processing i 1981\n",
      "Token Length--- 9135\n",
      "Processing i 1982\n",
      "Token Length--- 6017\n",
      "Processing i 1983\n",
      "Token Length--- 6175\n",
      "Processing i 1984\n",
      "Token Length--- 4451\n",
      "Processing i 1985\n",
      "Token Length--- 18122\n",
      "Processing i 1986\n",
      "Token Length--- 3714\n",
      "Processing i 1987\n",
      "Token Length--- 21656\n",
      "Processing i 1988\n",
      "Token Length--- 2503\n",
      "Processing i 1989\n",
      "Token Length--- 5675\n",
      "Processing i 1990\n",
      "Token Length--- 3466\n",
      "Processing i 1991\n",
      "Token Length--- 1033\n",
      "Processing i 1992\n",
      "Token Length--- 8131\n",
      "Processing i 1993\n",
      "Token Length--- 29595\n",
      "Processing i 1994\n",
      "Token Length--- 10283\n",
      "Processing i 1995\n",
      "Token Length--- 7118\n",
      "Processing i 1996\n",
      "Token Length--- 4020\n",
      "Processing i 1997\n",
      "Token Length--- 12185\n",
      "Processing i 1998\n",
      "Token Length--- 15169\n",
      "Processing i 1999\n",
      "Token Length--- 3517\n",
      "Processing i 2000\n",
      "Token Length--- 8086\n",
      "Processing i 2001\n",
      "Token Length--- 3172\n",
      "Processing i 2002\n",
      "Token Length--- 30525\n",
      "Processing i 2003\n",
      "Token Length--- 17459\n",
      "Processing i 2004\n",
      "Token Length--- 4235\n",
      "Processing i 2005\n",
      "Token Length--- 22035\n",
      "Processing i 2006\n",
      "Token Length--- 16200\n",
      "Processing i 2007\n",
      "Token Length--- 3919\n",
      "Processing i 2008\n",
      "Token Length--- 9516\n",
      "Processing i 2009\n",
      "Token Length--- 14622\n",
      "Processing i 2010\n",
      "Token Length--- 2956\n",
      "Processing i 2011\n",
      "Token Length--- 12131\n",
      "Processing i 2012\n",
      "Token Length--- 1843\n",
      "Processing i 2013\n",
      "Token Length--- 9601\n",
      "Processing i 2014\n",
      "Token Length--- 2189\n",
      "Processing i 2015\n",
      "Token Length--- 3084\n",
      "Processing i 2016\n",
      "Token Length--- 3373\n",
      "Processing i 2017\n",
      "Token Length--- 13781\n",
      "Processing i 2018\n",
      "Token Length--- 8185\n",
      "Processing i 2019\n",
      "Token Length--- 1715\n",
      "Processing i 2020\n",
      "Token Length--- 1811\n",
      "Processing i 2021\n",
      "Token Length--- 10330\n",
      "Processing i 2022\n",
      "Token Length--- 7111\n",
      "Processing i 2023\n",
      "Token Length--- 1525\n",
      "Processing i 2024\n",
      "Token Length--- 4116\n",
      "Processing i 2025\n",
      "Token Length--- 24498\n",
      "Processing i 2026\n",
      "Token Length--- 7351\n",
      "Processing i 2027\n",
      "Token Length--- 12847\n",
      "Processing i 2028\n",
      "Token Length--- 2291\n",
      "Processing i 2029\n",
      "Token Length--- 2859\n",
      "Processing i 2030\n",
      "Token Length--- 918\n",
      "Processing i 2031\n",
      "Token Length--- 1941\n",
      "Processing i 2032\n",
      "Token Length--- 5259\n",
      "Processing i 2033\n",
      "Token Length--- 16641\n",
      "Processing i 2034\n",
      "Token Length--- 2173\n",
      "Processing i 2035\n",
      "Token Length--- 3512\n",
      "Processing i 2036\n",
      "Token Length--- 3042\n",
      "Processing i 2037\n",
      "Token Length--- 7819\n",
      "Processing i 2038\n",
      "Token Length--- 11112\n",
      "Processing i 2039\n",
      "Token Length--- 4215\n",
      "Processing i 2040\n",
      "Token Length--- 15123\n",
      "Processing i 2041\n",
      "Token Length--- 25756\n",
      "Processing i 2042\n",
      "Token Length--- 1175\n",
      "Processing i 2043\n",
      "Token Length--- 14300\n",
      "Processing i 2044\n",
      "Token Length--- 4534\n",
      "Processing i 2045\n",
      "Token Length--- 3698\n",
      "Processing i 2046\n",
      "Token Length--- 14232\n",
      "Processing i 2047\n",
      "Token Length--- 8257\n",
      "Processing i 2048\n",
      "Token Length--- 7500\n",
      "Processing i 2049\n",
      "Token Length--- 11024\n",
      "Processing i 2050\n",
      "Token Length--- 1015\n",
      "Processing i 2051\n",
      "Token Length--- 9685\n",
      "Processing i 2052\n",
      "Token Length--- 4571\n",
      "Processing i 2053\n",
      "Token Length--- 21759\n",
      "Processing i 2054\n",
      "Token Length--- 2525\n",
      "Processing i 2055\n",
      "Token Length--- 11051\n",
      "Processing i 2056\n",
      "Token Length--- 14373\n",
      "Processing i 2057\n",
      "Token Length--- 25625\n",
      "Processing i 2058\n",
      "Token Length--- 12829\n",
      "Processing i 2059\n",
      "Token Length--- 7680\n",
      "Processing i 2060\n",
      "Token Length--- 11221\n",
      "Processing i 2061\n",
      "Token Length--- 16808\n",
      "Processing i 2062\n",
      "Token Length--- 4837\n",
      "Processing i 2063\n",
      "Token Length--- 4506\n",
      "Processing i 2064\n",
      "Token Length--- 22268\n",
      "Processing i 2065\n",
      "Token Length--- 8324\n",
      "Processing i 2066\n",
      "Token Length--- 3114\n",
      "Processing i 2067\n",
      "Token Length--- 7572\n",
      "Processing i 2068\n",
      "Token Length--- 4058\n",
      "Processing i 2069\n",
      "Token Length--- 14950\n",
      "Processing i 2070\n",
      "Token Length--- 36145\n",
      "Processing i 2071\n",
      "Token Length--- 3668\n",
      "Processing i 2072\n",
      "Token Length--- 18771\n",
      "Processing i 2073\n",
      "Token Length--- 6237\n",
      "Processing i 2074\n",
      "Token Length--- 12093\n",
      "Processing i 2075\n",
      "Token Length--- 7949\n",
      "Processing i 2076\n",
      "Token Length--- 7867\n",
      "Processing i 2077\n",
      "Token Length--- 3213\n",
      "Processing i 2078\n",
      "Token Length--- 888\n",
      "Processing i 2079\n",
      "Token Length--- 990\n",
      "Processing i 2080\n",
      "Token Length--- 3529\n",
      "Processing i 2081\n",
      "Token Length--- 9644\n",
      "Processing i 2082\n",
      "Token Length--- 14024\n",
      "Processing i 2083\n",
      "Token Length--- 3104\n",
      "Processing i 2084\n",
      "Token Length--- 3586\n",
      "Processing i 2085\n",
      "Token Length--- 12145\n",
      "Processing i 2086\n",
      "Token Length--- 1687\n",
      "Processing i 2087\n",
      "Token Length--- 3136\n",
      "Processing i 2088\n",
      "Token Length--- 3840\n",
      "Processing i 2089\n",
      "Token Length--- 10908\n",
      "Processing i 2090\n",
      "Token Length--- 1221\n",
      "Processing i 2091\n",
      "Token Length--- 9407\n",
      "Processing i 2092\n",
      "Token Length--- 5937\n",
      "Processing i 2093\n",
      "Token Length--- 3596\n",
      "Processing i 2094\n",
      "Token Length--- 4010\n",
      "Processing i 2095\n",
      "Token Length--- 8144\n",
      "Processing i 2096\n",
      "Token Length--- 1442\n",
      "Processing i 2097\n",
      "Token Length--- 8871\n",
      "Processing i 2098\n",
      "Token Length--- 11275\n",
      "Processing i 2099\n",
      "Token Length--- 1893\n",
      "Processing i 2100\n",
      "Token Length--- 5463\n",
      "Processing i 2101\n",
      "Token Length--- 4320\n",
      "Processing i 2102\n",
      "Token Length--- 32059\n",
      "Processing i 2103\n",
      "Token Length--- 5493\n",
      "Processing i 2104\n",
      "Token Length--- 6629\n",
      "Processing i 2105\n",
      "Token Length--- 5499\n",
      "Processing i 2106\n",
      "Token Length--- 11778\n",
      "Processing i 2107\n",
      "Token Length--- 7616\n",
      "Processing i 2108\n",
      "Token Length--- 4795\n",
      "Processing i 2109\n",
      "Token Length--- 2564\n",
      "Processing i 2110\n",
      "Token Length--- 6827\n",
      "Processing i 2111\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Token Length--- 34828\n",
      "Processing i 2112\n",
      "Token Length--- 34334\n",
      "Processing i 2113\n",
      "Token Length--- 6381\n",
      "Processing i 2114\n",
      "Token Length--- 3595\n",
      "Processing i 2115\n",
      "Token Length--- 3596\n",
      "Processing i 2116\n",
      "Token Length--- 17386\n",
      "Processing i 2117\n",
      "Token Length--- 8569\n",
      "Processing i 2118\n",
      "Token Length--- 4627\n",
      "Processing i 2119\n",
      "Token Length--- 1094\n",
      "Processing i 2120\n",
      "Token Length--- 3268\n",
      "Processing i 2121\n",
      "Token Length--- 21793\n",
      "Processing i 2122\n",
      "Token Length--- 8600\n",
      "Processing i 2123\n",
      "Token Length--- 18417\n",
      "Processing i 2124\n",
      "Token Length--- 2362\n",
      "Processing i 2125\n",
      "Token Length--- 22438\n",
      "Processing i 2126\n",
      "Token Length--- 21887\n",
      "Processing i 2127\n",
      "Token Length--- 6071\n",
      "Processing i 2128\n",
      "Token Length--- 10330\n",
      "Processing i 2129\n",
      "Token Length--- 10148\n",
      "Processing i 2130\n",
      "Token Length--- 2687\n",
      "Processing i 2131\n",
      "Token Length--- 10501\n",
      "Processing i 2132\n",
      "Token Length--- 8663\n",
      "Processing i 2133\n",
      "Token Length--- 1519\n",
      "Processing i 2134\n",
      "Token Length--- 4894\n",
      "Processing i 2135\n",
      "Token Length--- 2126\n",
      "Processing i 2136\n",
      "Token Length--- 2587\n",
      "Processing i 2137\n",
      "Token Length--- 22869\n",
      "Processing i 2138\n",
      "Token Length--- 24207\n",
      "Processing i 2139\n",
      "Token Length--- 31533\n",
      "Processing i 2140\n",
      "Token Length--- 416\n",
      "Processing i 2141\n",
      "Token Length--- 7147\n",
      "Processing i 2142\n",
      "Token Length--- 40326\n",
      "Processing i 2143\n",
      "Token Length--- 11488\n",
      "Processing i 2144\n",
      "Token Length--- 8605\n",
      "Processing i 2145\n",
      "Token Length--- 19933\n",
      "Processing i 2146\n",
      "Token Length--- 5477\n",
      "Processing i 2147\n",
      "Token Length--- 1174\n",
      "Processing i 2148\n",
      "Token Length--- 2588\n",
      "Processing i 2149\n",
      "Token Length--- 21459\n",
      "Processing i 2150\n",
      "Token Length--- 27815\n",
      "Processing i 2151\n",
      "Token Length--- 4400\n",
      "Processing i 2152\n",
      "Token Length--- 22256\n",
      "Processing i 2153\n",
      "Token Length--- 44312\n",
      "Processing i 2154\n",
      "Token Length--- 3281\n",
      "Processing i 2155\n",
      "Token Length--- 7953\n",
      "Processing i 2156\n",
      "Token Length--- 2619\n",
      "Processing i 2157\n",
      "Token Length--- 3924\n",
      "Processing i 2158\n",
      "Token Length--- 10573\n",
      "Processing i 2159\n",
      "Token Length--- 3084\n",
      "Processing i 2160\n",
      "Token Length--- 5831\n",
      "Processing i 2161\n",
      "Token Length--- 2060\n",
      "Processing i 2162\n",
      "Token Length--- 14603\n",
      "Processing i 2163\n",
      "Token Length--- 9845\n",
      "Processing i 2164\n",
      "Token Length--- 19266\n",
      "Processing i 2165\n",
      "Token Length--- 15712\n",
      "Processing i 2166\n",
      "Token Length--- 1508\n",
      "Processing i 2167\n",
      "Token Length--- 4169\n",
      "Processing i 2168\n",
      "Token Length--- 2473\n",
      "Processing i 2169\n",
      "Token Length--- 16934\n",
      "Processing i 2170\n",
      "Token Length--- 4844\n",
      "Processing i 2171\n",
      "Token Length--- 6598\n",
      "Processing i 2172\n",
      "Token Length--- 13150\n",
      "Processing i 2173\n",
      "Token Length--- 6508\n",
      "Processing i 2174\n",
      "Token Length--- 5571\n",
      "Processing i 2175\n",
      "Token Length--- 9435\n",
      "Processing i 2176\n",
      "Token Length--- 2062\n",
      "Processing i 2177\n",
      "Token Length--- 16923\n",
      "Processing i 2178\n",
      "Token Length--- 8168\n",
      "Processing i 2179\n",
      "Token Length--- 2707\n",
      "Processing i 2180\n",
      "Token Length--- 4436\n",
      "Processing i 2181\n",
      "Token Length--- 17064\n",
      "Processing i 2182\n",
      "Token Length--- 14837\n",
      "Processing i 2183\n",
      "Token Length--- 14668\n",
      "Processing i 2184\n",
      "Token Length--- 10913\n",
      "Processing i 2185\n",
      "Token Length--- 3267\n",
      "Processing i 2186\n",
      "Token Length--- 17385\n",
      "Processing i 2187\n",
      "Token Length--- 36136\n",
      "Processing i 2188\n",
      "Token Length--- 15774\n",
      "Processing i 2189\n",
      "Token Length--- 21424\n",
      "Processing i 2190\n",
      "Token Length--- 1152\n",
      "Processing i 2191\n",
      "Token Length--- 996\n",
      "Processing i 2192\n",
      "Token Length--- 26670\n",
      "Processing i 2193\n",
      "Token Length--- 4835\n",
      "Processing i 2194\n",
      "Token Length--- 14033\n",
      "Processing i 2195\n",
      "Token Length--- 4734\n",
      "Processing i 2196\n",
      "Token Length--- 5881\n",
      "Processing i 2197\n",
      "Token Length--- 5264\n",
      "Processing i 2198\n",
      "Token Length--- 4338\n",
      "Processing i 2199\n",
      "Token Length--- 2640\n",
      "Processing i 2200\n",
      "Token Length--- 2505\n",
      "Processing i 2201\n",
      "Token Length--- 11220\n",
      "Processing i 2202\n",
      "Token Length--- 7152\n",
      "Processing i 2203\n",
      "Token Length--- 9042\n",
      "Processing i 2204\n",
      "Token Length--- 1236\n",
      "Processing i 2205\n",
      "Token Length--- 24653\n",
      "Processing i 2206\n",
      "Token Length--- 5807\n",
      "Processing i 2207\n",
      "Token Length--- 23488\n",
      "Processing i 2208\n",
      "Token Length--- 10892\n",
      "Processing i 2209\n",
      "Token Length--- 2758\n",
      "Processing i 2210\n",
      "Token Length--- 5088\n",
      "Processing i 2211\n",
      "Token Length--- 18032\n",
      "Processing i 2212\n",
      "Token Length--- 21350\n",
      "Processing i 2213\n",
      "Token Length--- 15403\n",
      "Processing i 2214\n",
      "Token Length--- 7644\n",
      "Processing i 2215\n",
      "Token Length--- 12670\n",
      "Processing i 2216\n",
      "Token Length--- 13275\n",
      "Processing i 2217\n",
      "Token Length--- 8737\n",
      "Processing i 2218\n",
      "Token Length--- 7093\n",
      "Processing i 2219\n",
      "Token Length--- 2076\n",
      "Processing i 2220\n",
      "Token Length--- 16003\n",
      "Processing i 2221\n",
      "Token Length--- 5046\n",
      "Processing i 2222\n",
      "Token Length--- 12214\n",
      "Processing i 2223\n",
      "Token Length--- 13926\n",
      "Processing i 2224\n",
      "Token Length--- 18261\n",
      "Processing i 2225\n",
      "Token Length--- 56928\n",
      "Processing i 2226\n",
      "Token Length--- 10658\n",
      "Processing i 2227\n",
      "Token Length--- 16773\n",
      "Processing i 2228\n",
      "Token Length--- 38800\n",
      "Processing i 2229\n",
      "Token Length--- 22499\n",
      "Processing i 2230\n",
      "Token Length--- 14302\n",
      "Processing i 2231\n",
      "Token Length--- 23386\n",
      "Processing i 2232\n",
      "Token Length--- 17624\n",
      "Processing i 2233\n",
      "Token Length--- 4376\n",
      "Processing i 2234\n",
      "Token Length--- 6809\n",
      "Processing i 2235\n",
      "Token Length--- 16374\n",
      "Processing i 2236\n",
      "Token Length--- 3019\n",
      "Processing i 2237\n",
      "Token Length--- 12603\n",
      "Processing i 2238\n",
      "Token Length--- 4434\n",
      "Processing i 2239\n",
      "Token Length--- 10137\n",
      "Processing i 2240\n",
      "Token Length--- 3398\n",
      "Processing i 2241\n",
      "Token Length--- 13171\n",
      "Processing i 2242\n",
      "Token Length--- 3996\n",
      "Processing i 2243\n",
      "Token Length--- 10717\n",
      "Processing i 2244\n",
      "Token Length--- 18192\n",
      "Processing i 2245\n",
      "Token Length--- 14921\n",
      "Processing i 2246\n",
      "Token Length--- 10063\n",
      "Processing i 2247\n",
      "Token Length--- 22407\n",
      "Processing i 2248\n",
      "Token Length--- 5500\n",
      "Processing i 2249\n",
      "Token Length--- 2163\n",
      "Processing i 2250\n",
      "Token Length--- 22355\n",
      "Processing i 2251\n",
      "Token Length--- 9914\n",
      "Processing i 2252\n",
      "Token Length--- 9757\n",
      "Processing i 2253\n",
      "Token Length--- 6712\n",
      "Processing i 2254\n",
      "Token Length--- 3711\n",
      "Processing i 2255\n",
      "Token Length--- 6693\n",
      "Processing i 2256\n",
      "Token Length--- 5802\n",
      "Processing i 2257\n",
      "Token Length--- 6938\n",
      "Processing i 2258\n",
      "Token Length--- 11215\n",
      "Processing i 2259\n",
      "Token Length--- 3732\n",
      "Processing i 2260\n",
      "Token Length--- 5347\n",
      "Processing i 2261\n",
      "Token Length--- 915\n",
      "Processing i 2262\n",
      "Token Length--- 3817\n",
      "Processing i 2263\n",
      "Token Length--- 37647\n",
      "Processing i 2264\n",
      "Token Length--- 2896\n",
      "Processing i 2265\n",
      "Token Length--- 739\n",
      "Processing i 2266\n",
      "Token Length--- 2456\n",
      "Processing i 2267\n",
      "Token Length--- 17921\n",
      "Processing i 2268\n",
      "Token Length--- 5235\n",
      "Processing i 2269\n",
      "Token Length--- 5148\n",
      "Processing i 2270\n",
      "Token Length--- 3315\n",
      "Processing i 2271\n",
      "Token Length--- 3036\n",
      "Processing i 2272\n",
      "Token Length--- 7102\n",
      "Processing i 2273\n",
      "Token Length--- 5767\n",
      "Processing i 2274\n",
      "Token Length--- 5007\n",
      "Processing i 2275\n",
      "Token Length--- 10670\n",
      "Processing i 2276\n",
      "Token Length--- 19308\n",
      "Processing i 2277\n",
      "Token Length--- 18777\n",
      "Processing i 2278\n",
      "Token Length--- 13818\n",
      "Processing i 2279\n",
      "Token Length--- 30449\n",
      "Processing i 2280\n",
      "Token Length--- 6360\n",
      "Processing i 2281\n",
      "Token Length--- 7700\n",
      "Processing i 2282\n",
      "Token Length--- 11420\n",
      "Processing i 2283\n",
      "Token Length--- 17372\n",
      "Processing i 2284\n",
      "Token Length--- 10411\n",
      "Processing i 2285\n",
      "Token Length--- 16838\n",
      "Processing i 2286\n",
      "Token Length--- 16784\n",
      "Processing i 2287\n",
      "Token Length--- 4629\n",
      "Processing i 2288\n",
      "Token Length--- 1179\n",
      "Processing i 2289\n",
      "Token Length--- 1814\n",
      "Processing i 2290\n",
      "Token Length--- 22131\n",
      "Processing i 2291\n",
      "Token Length--- 13297\n",
      "Processing i 2292\n",
      "Token Length--- 2864\n",
      "Processing i 2293\n",
      "Token Length--- 19043\n",
      "Processing i 2294\n",
      "Token Length--- 1541\n",
      "Processing i 2295\n",
      "Token Length--- 3053\n",
      "Processing i 2296\n",
      "Token Length--- 4865\n",
      "Processing i 2297\n",
      "Token Length--- 3002\n",
      "Processing i 2298\n",
      "Token Length--- 4533\n",
      "Processing i 2299\n",
      "Token Length--- 15673\n",
      "Processing i 2300\n",
      "Token Length--- 3280\n",
      "Processing i 2301\n",
      "Token Length--- 13440\n",
      "Processing i 2302\n",
      "Token Length--- 2324\n",
      "Processing i 2303\n",
      "Token Length--- 1921\n",
      "Processing i 2304\n",
      "Token Length--- 3407\n",
      "Processing i 2305\n",
      "Token Length--- 9203\n",
      "Processing i 2306\n",
      "Token Length--- 7545\n",
      "Processing i 2307\n",
      "Token Length--- 2776\n",
      "Processing i 2308\n",
      "Token Length--- 4192\n",
      "Processing i 2309\n",
      "Token Length--- 8452\n",
      "Processing i 2310\n",
      "Token Length--- 4847\n",
      "Processing i 2311\n",
      "Token Length--- 5250\n",
      "Processing i 2312\n",
      "Token Length--- 7265\n",
      "Processing i 2313\n",
      "Token Length--- 12015\n",
      "Processing i 2314\n",
      "Token Length--- 8493\n",
      "Processing i 2315\n",
      "Token Length--- 1863\n",
      "Processing i 2316\n",
      "Token Length--- 5231\n",
      "Processing i 2317\n",
      "Token Length--- 2960\n",
      "Processing i 2318\n",
      "Token Length--- 4911\n",
      "Processing i 2319\n",
      "Token Length--- 1274\n",
      "Processing i 2320\n",
      "Token Length--- 6353\n",
      "Processing i 2321\n",
      "Token Length--- 6348\n",
      "Processing i 2322\n",
      "Token Length--- 5669\n",
      "Processing i 2323\n",
      "Token Length--- 5010\n",
      "Processing i 2324\n",
      "Token Length--- 9106\n",
      "Processing i 2325\n",
      "Token Length--- 2817\n",
      "Processing i 2326\n",
      "Token Length--- 5856\n",
      "Processing i 2327\n",
      "Token Length--- 1404\n",
      "Processing i 2328\n",
      "Token Length--- 3831\n",
      "Processing i 2329\n",
      "Token Length--- 20759\n",
      "Processing i 2330\n",
      "Token Length--- 1448\n",
      "Processing i 2331\n",
      "Token Length--- 28381\n",
      "Processing i 2332\n",
      "Token Length--- 1765\n",
      "Processing i 2333\n",
      "Token Length--- 3912\n",
      "Processing i 2334\n",
      "Token Length--- 2238\n",
      "Processing i 2335\n",
      "Token Length--- 2884\n",
      "Processing i 2336\n",
      "Token Length--- 4983\n",
      "Processing i 2337\n",
      "Token Length--- 6616\n",
      "Processing i 2338\n",
      "Token Length--- 14216\n",
      "Processing i 2339\n",
      "Token Length--- 6314\n",
      "Processing i 2340\n",
      "Token Length--- 1199\n",
      "Processing i 2341\n",
      "Token Length--- 5603\n",
      "Processing i 2342\n",
      "Token Length--- 3839\n",
      "Processing i 2343\n",
      "Token Length--- 9362\n",
      "Processing i 2344\n",
      "Token Length--- 5404\n",
      "Processing i 2345\n",
      "Token Length--- 12890\n",
      "Processing i 2346\n",
      "Token Length--- 3731\n",
      "Processing i 2347\n",
      "Token Length--- 2795\n",
      "Processing i 2348\n",
      "Token Length--- 12328\n",
      "Processing i 2349\n",
      "Token Length--- 2041\n",
      "Processing i 2350\n",
      "Token Length--- 17570\n",
      "Processing i 2351\n",
      "Token Length--- 12215\n",
      "Processing i 2352\n",
      "Token Length--- 10824\n",
      "Processing i 2353\n",
      "Token Length--- 4834\n",
      "Processing i 2354\n",
      "Token Length--- 19598\n",
      "Processing i 2355\n",
      "Token Length--- 3609\n",
      "Processing i 2356\n",
      "Token Length--- 15919\n",
      "Processing i 2357\n",
      "Token Length--- 4498\n",
      "Processing i 2358\n",
      "Token Length--- 24723\n",
      "Processing i 2359\n",
      "Token Length--- 17169\n",
      "Processing i 2360\n",
      "Token Length--- 2085\n",
      "Processing i 2361\n",
      "Token Length--- 5723\n",
      "Processing i 2362\n",
      "Token Length--- 5540\n",
      "Processing i 2363\n",
      "Token Length--- 2430\n",
      "Processing i 2364\n",
      "Token Length--- 10339\n",
      "Processing i 2365\n",
      "Token Length--- 5321\n",
      "Processing i 2366\n",
      "Token Length--- 10721\n",
      "Processing i 2367\n",
      "Token Length--- 7719\n",
      "Processing i 2368\n",
      "Token Length--- 1022\n",
      "Processing i 2369\n",
      "Token Length--- 2287\n",
      "Processing i 2370\n",
      "Token Length--- 5193\n",
      "Processing i 2371\n",
      "Token Length--- 9874\n",
      "Processing i 2372\n",
      "Token Length--- 9786\n",
      "Processing i 2373\n",
      "Token Length--- 8121\n",
      "Processing i 2374\n",
      "Token Length--- 4297\n",
      "Processing i 2375\n",
      "Token Length--- 3316\n",
      "Processing i 2376\n",
      "Token Length--- 21864\n",
      "Processing i 2377\n",
      "Token Length--- 5790\n",
      "Processing i 2378\n",
      "Token Length--- 4090\n",
      "Processing i 2379\n",
      "Token Length--- 5151\n",
      "Processing i 2380\n",
      "Token Length--- 7577\n",
      "Processing i 2381\n",
      "Token Length--- 5164\n",
      "Processing i 2382\n",
      "Token Length--- 9208\n",
      "Processing i 2383\n",
      "Token Length--- 1742\n",
      "Processing i 2384\n",
      "Token Length--- 4421\n",
      "Processing i 2385\n",
      "Token Length--- 4584\n",
      "Processing i 2386\n",
      "Token Length--- 1621\n",
      "Processing i 2387\n",
      "Token Length--- 25868\n",
      "Processing i 2388\n",
      "Token Length--- 17384\n",
      "Processing i 2389\n",
      "Token Length--- 4993\n",
      "Processing i 2390\n",
      "Token Length--- 4815\n",
      "Processing i 2391\n",
      "Token Length--- 2904\n",
      "Processing i 2392\n",
      "Token Length--- 19145\n",
      "Processing i 2393\n",
      "Token Length--- 3635\n",
      "Processing i 2394\n",
      "Token Length--- 31811\n",
      "Processing i 2395\n",
      "Token Length--- 4936\n",
      "Processing i 2396\n",
      "Token Length--- 6594\n",
      "Processing i 2397\n",
      "Token Length--- 14518\n",
      "Processing i 2398\n",
      "Token Length--- 10933\n",
      "Processing i 2399\n",
      "Token Length--- 7639\n",
      "Processing i 2400\n",
      "Token Length--- 11211\n",
      "Processing i 2401\n",
      "Token Length--- 12059\n",
      "Processing i 2402\n",
      "Token Length--- 2603\n",
      "Processing i 2403\n",
      "Token Length--- 4109\n",
      "Processing i 2404\n",
      "Token Length--- 7884\n",
      "Processing i 2405\n",
      "Token Length--- 19162\n",
      "Processing i 2406\n",
      "Token Length--- 16440\n",
      "Processing i 2407\n",
      "Token Length--- 14423\n",
      "Processing i 2408\n",
      "Token Length--- 1182\n",
      "Processing i 2409\n",
      "Token Length--- 2740\n",
      "Processing i 2410\n",
      "Token Length--- 7068\n",
      "Processing i 2411\n",
      "Token Length--- 31002\n",
      "Processing i 2412\n",
      "Token Length--- 6272\n",
      "Processing i 2413\n",
      "Token Length--- 1707\n",
      "Processing i 2414\n",
      "Token Length--- 14903\n",
      "Processing i 2415\n",
      "Token Length--- 6477\n",
      "Processing i 2416\n",
      "Token Length--- 7911\n",
      "Processing i 2417\n",
      "Token Length--- 7339\n",
      "Processing i 2418\n",
      "Token Length--- 28879\n",
      "Processing i 2419\n",
      "Token Length--- 3866\n",
      "Processing i 2420\n",
      "Token Length--- 4209\n",
      "Processing i 2421\n",
      "Token Length--- 2959\n",
      "Processing i 2422\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Token Length--- 3965\n",
      "Processing i 2423\n",
      "Token Length--- 14287\n",
      "Processing i 2424\n",
      "Token Length--- 715\n",
      "Processing i 2425\n",
      "Token Length--- 7429\n",
      "Processing i 2426\n",
      "Token Length--- 1941\n",
      "Processing i 2427\n",
      "Token Length--- 4188\n",
      "Processing i 2428\n",
      "Token Length--- 9848\n",
      "Processing i 2429\n",
      "Token Length--- 4884\n",
      "Processing i 2430\n",
      "Token Length--- 15817\n",
      "Processing i 2431\n",
      "Token Length--- 37725\n",
      "Processing i 2432\n",
      "Token Length--- 1302\n",
      "Processing i 2433\n",
      "Token Length--- 9719\n",
      "Processing i 2434\n",
      "Token Length--- 449\n",
      "Processing i 2435\n",
      "Token Length--- 6791\n",
      "Processing i 2436\n",
      "Token Length--- 8805\n",
      "Processing i 2437\n",
      "Token Length--- 3973\n",
      "Processing i 2438\n",
      "Token Length--- 19979\n",
      "Processing i 2439\n",
      "Token Length--- 6083\n",
      "Processing i 2440\n",
      "Token Length--- 3742\n",
      "Processing i 2441\n",
      "Token Length--- 2104\n",
      "Processing i 2442\n",
      "Token Length--- 12807\n",
      "Processing i 2443\n",
      "Token Length--- 16666\n",
      "Processing i 2444\n",
      "Token Length--- 9678\n",
      "Processing i 2445\n",
      "Token Length--- 6271\n",
      "Processing i 2446\n",
      "Token Length--- 4555\n",
      "Processing i 2447\n",
      "Token Length--- 9507\n",
      "Processing i 2448\n",
      "Token Length--- 7011\n",
      "Processing i 2449\n",
      "Token Length--- 4542\n",
      "Processing i 2450\n",
      "Token Length--- 3037\n",
      "Processing i 2451\n",
      "Token Length--- 2729\n",
      "Processing i 2452\n",
      "Token Length--- 16299\n",
      "Processing i 2453\n",
      "Token Length--- 10157\n",
      "Processing i 2454\n",
      "Token Length--- 8648\n",
      "Processing i 2455\n",
      "Token Length--- 5605\n",
      "Processing i 2456\n",
      "Token Length--- 19668\n",
      "Processing i 2457\n",
      "Token Length--- 15848\n",
      "Processing i 2458\n",
      "Token Length--- 8354\n",
      "Processing i 2459\n",
      "Token Length--- 2111\n",
      "Processing i 2460\n",
      "Token Length--- 13761\n",
      "Processing i 2461\n",
      "Token Length--- 3104\n",
      "Processing i 2462\n",
      "Token Length--- 21721\n",
      "Processing i 2463\n",
      "Token Length--- 5706\n",
      "Processing i 2464\n",
      "Token Length--- 11423\n",
      "Processing i 2465\n",
      "Token Length--- 3176\n",
      "Processing i 2466\n",
      "Token Length--- 20117\n",
      "Processing i 2467\n",
      "Token Length--- 4371\n",
      "Processing i 2468\n",
      "Token Length--- 30802\n",
      "Processing i 2469\n",
      "Token Length--- 3058\n",
      "Processing i 2470\n",
      "Token Length--- 15679\n",
      "Processing i 2471\n",
      "Token Length--- 15025\n",
      "Processing i 2472\n",
      "Token Length--- 17271\n",
      "Processing i 2473\n",
      "Token Length--- 49982\n",
      "Processing i 2474\n",
      "Token Length--- 3471\n",
      "Processing i 2475\n",
      "Token Length--- 2209\n",
      "Processing i 2476\n",
      "Token Length--- 37961\n",
      "Processing i 2477\n",
      "Token Length--- 16962\n",
      "Processing i 2478\n",
      "Token Length--- 8146\n",
      "Processing i 2479\n",
      "Token Length--- 1470\n",
      "Processing i 2480\n",
      "Token Length--- 21641\n",
      "Processing i 2481\n",
      "Token Length--- 5429\n",
      "Processing i 2482\n",
      "Token Length--- 8222\n",
      "Processing i 2483\n",
      "Token Length--- 3798\n",
      "Processing i 2484\n",
      "Token Length--- 21307\n",
      "Processing i 2485\n",
      "Token Length--- 5110\n",
      "Processing i 2486\n",
      "Token Length--- 11645\n",
      "Processing i 2487\n",
      "Token Length--- 6357\n",
      "Processing i 2488\n",
      "Token Length--- 6114\n",
      "Processing i 2489\n",
      "Token Length--- 10057\n",
      "Processing i 2490\n",
      "Token Length--- 2681\n",
      "Processing i 2491\n",
      "Token Length--- 9086\n",
      "Processing i 2492\n",
      "Token Length--- 2478\n",
      "Processing i 2493\n",
      "Token Length--- 4343\n",
      "Processing i 2494\n",
      "Token Length--- 2235\n",
      "Processing i 2495\n",
      "Token Length--- 23891\n",
      "Processing i 2496\n",
      "Token Length--- 5970\n",
      "Processing i 2497\n",
      "Token Length--- 582\n",
      "Processing i 2498\n",
      "Token Length--- 4820\n",
      "Processing i 2499\n",
      "Token Length--- 1418\n",
      "Processing i 2500\n",
      "Token Length--- 5022\n",
      "Processing i 2501\n",
      "Token Length--- 28001\n",
      "Processing i 2502\n",
      "Token Length--- 18021\n",
      "Processing i 2503\n",
      "Token Length--- 29261\n",
      "Processing i 2504\n",
      "Token Length--- 6628\n",
      "Processing i 2505\n",
      "Token Length--- 13258\n",
      "Processing i 2506\n",
      "Token Length--- 26110\n",
      "Processing i 2507\n",
      "Token Length--- 4894\n",
      "Processing i 2508\n",
      "Token Length--- 4128\n",
      "Processing i 2509\n",
      "Token Length--- 8294\n",
      "Processing i 2510\n",
      "Token Length--- 6069\n",
      "Processing i 2511\n",
      "Token Length--- 6633\n",
      "Processing i 2512\n",
      "Token Length--- 6100\n",
      "Processing i 2513\n",
      "Token Length--- 6482\n",
      "Processing i 2514\n",
      "Token Length--- 2336\n",
      "Processing i 2515\n",
      "Token Length--- 4055\n",
      "Processing i 2516\n",
      "Token Length--- 13302\n",
      "Processing i 2517\n",
      "Token Length--- 14833\n",
      "Processing i 2518\n",
      "Token Length--- 4530\n",
      "Processing i 2519\n",
      "Token Length--- 5612\n",
      "Processing i 2520\n",
      "Token Length--- 4322\n",
      "Processing i 2521\n",
      "Token Length--- 22387\n",
      "Processing i 2522\n",
      "Token Length--- 3063\n",
      "Processing i 2523\n",
      "Token Length--- 5616\n",
      "Processing i 2524\n",
      "Token Length--- 4084\n",
      "Processing i 2525\n",
      "Token Length--- 7309\n",
      "Processing i 2526\n",
      "Token Length--- 6571\n",
      "Processing i 2527\n",
      "Token Length--- 10133\n",
      "Processing i 2528\n",
      "Token Length--- 9433\n",
      "Processing i 2529\n",
      "Token Length--- 12114\n",
      "Processing i 2530\n",
      "Token Length--- 15725\n",
      "Processing i 2531\n",
      "Token Length--- 8125\n",
      "Processing i 2532\n",
      "Token Length--- 827\n",
      "Processing i 2533\n",
      "Token Length--- 4259\n",
      "Processing i 2534\n",
      "Token Length--- 4616\n",
      "Processing i 2535\n",
      "Token Length--- 2824\n",
      "Processing i 2536\n",
      "Token Length--- 4434\n",
      "Processing i 2537\n",
      "Token Length--- 5735\n",
      "Processing i 2538\n",
      "Token Length--- 8173\n",
      "Processing i 2539\n",
      "Token Length--- 2227\n",
      "Processing i 2540\n",
      "Token Length--- 41683\n",
      "Processing i 2541\n",
      "Token Length--- 5766\n",
      "Processing i 2542\n",
      "Token Length--- 3543\n",
      "Processing i 2543\n",
      "Token Length--- 3851\n",
      "Processing i 2544\n",
      "Token Length--- 8625\n",
      "Processing i 2545\n",
      "Token Length--- 9387\n",
      "Processing i 2546\n",
      "Token Length--- 10128\n",
      "Processing i 2547\n",
      "Token Length--- 2378\n",
      "Processing i 2548\n",
      "Token Length--- 5236\n",
      "Processing i 2549\n",
      "Token Length--- 10133\n",
      "Processing i 2550\n",
      "Token Length--- 3722\n",
      "Processing i 2551\n",
      "Token Length--- 5479\n",
      "Processing i 2552\n",
      "Token Length--- 1893\n",
      "Processing i 2553\n",
      "Token Length--- 17124\n",
      "Processing i 2554\n",
      "Token Length--- 26174\n",
      "Processing i 2555\n",
      "Token Length--- 21254\n",
      "Processing i 2556\n",
      "Token Length--- 14588\n",
      "Processing i 2557\n",
      "Token Length--- 4740\n",
      "Processing i 2558\n",
      "Token Length--- 9496\n",
      "Processing i 2559\n",
      "Token Length--- 7100\n",
      "Processing i 2560\n",
      "Token Length--- 9202\n",
      "Processing i 2561\n",
      "Token Length--- 7667\n",
      "Processing i 2562\n",
      "Token Length--- 9720\n",
      "Processing i 2563\n",
      "Token Length--- 9145\n",
      "Processing i 2564\n",
      "Token Length--- 8716\n",
      "Processing i 2565\n",
      "Token Length--- 8132\n",
      "Processing i 2566\n",
      "Token Length--- 1277\n",
      "Processing i 2567\n",
      "Token Length--- 7328\n",
      "Processing i 2568\n",
      "Token Length--- 15200\n",
      "Processing i 2569\n",
      "Token Length--- 14084\n",
      "Processing i 2570\n",
      "Token Length--- 10239\n",
      "Processing i 2571\n",
      "Token Length--- 1991\n",
      "Processing i 2572\n",
      "Token Length--- 1601\n",
      "Processing i 2573\n",
      "Token Length--- 2796\n",
      "Processing i 2574\n",
      "Token Length--- 3882\n",
      "Processing i 2575\n",
      "Token Length--- 6472\n",
      "Processing i 2576\n",
      "Token Length--- 44069\n",
      "Processing i 2577\n",
      "Token Length--- 9048\n",
      "Processing i 2578\n",
      "Token Length--- 31111\n",
      "Processing i 2579\n",
      "Token Length--- 19050\n",
      "Processing i 2580\n",
      "Token Length--- 7042\n",
      "Processing i 2581\n",
      "Token Length--- 4069\n",
      "Processing i 2582\n",
      "Token Length--- 31684\n",
      "Processing i 2583\n",
      "Token Length--- 1912\n",
      "Processing i 2584\n",
      "Token Length--- 18306\n",
      "Processing i 2585\n",
      "Token Length--- 8909\n",
      "Processing i 2586\n",
      "Token Length--- 3121\n",
      "Processing i 2587\n",
      "Token Length--- 24550\n",
      "Processing i 2588\n",
      "Token Length--- 17199\n",
      "Processing i 2589\n",
      "Token Length--- 13760\n",
      "Processing i 2590\n",
      "Token Length--- 5201\n",
      "Processing i 2591\n",
      "Token Length--- 6013\n",
      "Processing i 2592\n",
      "Token Length--- 813\n",
      "Processing i 2593\n",
      "Token Length--- 17977\n",
      "Processing i 2594\n",
      "Token Length--- 7189\n",
      "Processing i 2595\n",
      "Token Length--- 2689\n",
      "Processing i 2596\n",
      "Token Length--- 2103\n",
      "Processing i 2597\n",
      "Token Length--- 10331\n",
      "Processing i 2598\n",
      "Token Length--- 10569\n",
      "Processing i 2599\n",
      "Token Length--- 4099\n",
      "Processing i 2600\n",
      "Token Length--- 13435\n",
      "Processing i 2601\n",
      "Token Length--- 5310\n",
      "Processing i 2602\n",
      "Token Length--- 6413\n",
      "Processing i 2603\n",
      "Token Length--- 5499\n",
      "Processing i 2604\n",
      "Token Length--- 10960\n",
      "Processing i 2605\n",
      "Token Length--- 7369\n",
      "Processing i 2606\n",
      "Token Length--- 8364\n",
      "Processing i 2607\n",
      "Token Length--- 12733\n",
      "Processing i 2608\n",
      "Token Length--- 12968\n",
      "Processing i 2609\n",
      "Token Length--- 17370\n",
      "Processing i 2610\n",
      "Token Length--- 7147\n",
      "Processing i 2611\n",
      "Token Length--- 874\n",
      "Processing i 2612\n",
      "Token Length--- 2003\n",
      "Processing i 2613\n",
      "Token Length--- 1489\n",
      "Processing i 2614\n",
      "Token Length--- 4528\n",
      "Processing i 2615\n",
      "Token Length--- 2399\n",
      "Processing i 2616\n",
      "Token Length--- 13296\n",
      "Processing i 2617\n",
      "Token Length--- 8044\n",
      "Processing i 2618\n",
      "Token Length--- 10790\n",
      "Processing i 2619\n",
      "Token Length--- 2034\n",
      "Processing i 2620\n",
      "Token Length--- 3669\n",
      "Processing i 2621\n",
      "Token Length--- 8841\n",
      "Processing i 2622\n",
      "Token Length--- 1718\n",
      "Processing i 2623\n",
      "Token Length--- 4075\n",
      "Processing i 2624\n",
      "Token Length--- 12932\n",
      "Processing i 2625\n",
      "Token Length--- 1967\n",
      "Processing i 2626\n",
      "Token Length--- 17806\n",
      "Processing i 2627\n",
      "Token Length--- 13576\n",
      "Processing i 2628\n",
      "Token Length--- 21032\n",
      "Processing i 2629\n",
      "Token Length--- 9448\n",
      "Processing i 2630\n",
      "Token Length--- 13748\n",
      "Processing i 2631\n",
      "Token Length--- 18925\n",
      "Processing i 2632\n",
      "Token Length--- 11694\n",
      "Processing i 2633\n",
      "Token Length--- 8728\n",
      "Processing i 2634\n",
      "Token Length--- 4044\n",
      "Processing i 2635\n",
      "Token Length--- 5354\n",
      "Processing i 2636\n",
      "Token Length--- 7337\n",
      "Processing i 2637\n",
      "Token Length--- 4036\n",
      "Processing i 2638\n",
      "Token Length--- 11478\n",
      "Processing i 2639\n",
      "Token Length--- 2212\n",
      "Processing i 2640\n",
      "Token Length--- 3952\n",
      "Processing i 2641\n",
      "Token Length--- 39044\n",
      "Processing i 2642\n",
      "Token Length--- 11661\n",
      "Processing i 2643\n",
      "Token Length--- 3003\n",
      "Processing i 2644\n",
      "Token Length--- 9038\n",
      "Processing i 2645\n",
      "Token Length--- 12108\n",
      "Processing i 2646\n",
      "Token Length--- 8408\n",
      "Processing i 2647\n",
      "Token Length--- 17898\n",
      "Processing i 2648\n",
      "Token Length--- 13315\n",
      "Processing i 2649\n",
      "Token Length--- 12168\n",
      "Processing i 2650\n",
      "Token Length--- 3594\n",
      "Processing i 2651\n",
      "Token Length--- 8113\n",
      "Processing i 2652\n",
      "Token Length--- 30578\n",
      "Processing i 2653\n",
      "Token Length--- 16770\n",
      "Processing i 2654\n",
      "Token Length--- 3957\n",
      "Processing i 2655\n",
      "Token Length--- 23592\n",
      "Processing i 2656\n",
      "Token Length--- 7061\n",
      "Processing i 2657\n",
      "Token Length--- 11023\n",
      "Processing i 2658\n",
      "Token Length--- 10500\n",
      "Processing i 2659\n",
      "Token Length--- 4613\n",
      "Processing i 2660\n",
      "Token Length--- 3110\n",
      "Processing i 2661\n",
      "Token Length--- 5027\n",
      "Processing i 2662\n",
      "Token Length--- 15569\n",
      "Processing i 2663\n",
      "Token Length--- 25912\n",
      "Processing i 2664\n",
      "Token Length--- 674\n",
      "Processing i 2665\n",
      "Token Length--- 736\n",
      "Processing i 2666\n",
      "Token Length--- 2348\n",
      "Processing i 2667\n",
      "Token Length--- 20913\n",
      "Processing i 2668\n",
      "Token Length--- 10689\n",
      "Processing i 2669\n",
      "Token Length--- 14281\n",
      "Processing i 2670\n",
      "Token Length--- 3305\n",
      "Processing i 2671\n",
      "Token Length--- 6882\n",
      "Processing i 2672\n",
      "Token Length--- 13903\n",
      "Processing i 2673\n",
      "Token Length--- 4702\n",
      "Processing i 2674\n",
      "Token Length--- 20873\n",
      "Processing i 2675\n",
      "Token Length--- 12652\n",
      "Processing i 2676\n",
      "Token Length--- 14982\n",
      "Processing i 2677\n",
      "Token Length--- 2015\n",
      "Processing i 2678\n",
      "Token Length--- 676\n",
      "Processing i 2679\n",
      "Token Length--- 4088\n",
      "Processing i 2680\n",
      "Token Length--- 13300\n",
      "Processing i 2681\n",
      "Token Length--- 13382\n",
      "Processing i 2682\n",
      "Token Length--- 6658\n",
      "Processing i 2683\n",
      "Token Length--- 12706\n",
      "Processing i 2684\n",
      "Token Length--- 7847\n",
      "Processing i 2685\n",
      "Token Length--- 1367\n",
      "Processing i 2686\n",
      "Token Length--- 2067\n",
      "Processing i 2687\n",
      "Token Length--- 4547\n",
      "Processing i 2688\n",
      "Token Length--- 17759\n",
      "Processing i 2689\n",
      "Token Length--- 31519\n",
      "Processing i 2690\n",
      "Token Length--- 6246\n",
      "Processing i 2691\n",
      "Token Length--- 2480\n",
      "Processing i 2692\n",
      "Token Length--- 1036\n",
      "Processing i 2693\n",
      "Token Length--- 5087\n",
      "Processing i 2694\n",
      "Token Length--- 2319\n",
      "Processing i 2695\n",
      "Token Length--- 3543\n",
      "Processing i 2696\n",
      "Token Length--- 1711\n",
      "Processing i 2697\n",
      "Token Length--- 3088\n",
      "Processing i 2698\n",
      "Token Length--- 12348\n",
      "Processing i 2699\n",
      "Token Length--- 1275\n",
      "Processing i 2700\n",
      "Token Length--- 4870\n",
      "Processing i 2701\n",
      "Token Length--- 6351\n",
      "Processing i 2702\n",
      "Token Length--- 1424\n",
      "Processing i 2703\n",
      "Token Length--- 8677\n",
      "Processing i 2704\n",
      "Token Length--- 9181\n",
      "Processing i 2705\n",
      "Token Length--- 3052\n",
      "Processing i 2706\n",
      "Token Length--- 3771\n",
      "Processing i 2707\n",
      "Token Length--- 2260\n",
      "Processing i 2708\n",
      "Token Length--- 3884\n",
      "Processing i 2709\n",
      "Token Length--- 2317\n",
      "Processing i 2710\n",
      "Token Length--- 9970\n",
      "Processing i 2711\n",
      "Token Length--- 4829\n",
      "Processing i 2712\n",
      "Token Length--- 2491\n",
      "Processing i 2713\n",
      "Token Length--- 1219\n",
      "Processing i 2714\n",
      "Token Length--- 38682\n",
      "Processing i 2715\n",
      "Token Length--- 2415\n",
      "Processing i 2716\n",
      "Token Length--- 5616\n",
      "Processing i 2717\n",
      "Token Length--- 10548\n",
      "Processing i 2718\n",
      "Token Length--- 12873\n",
      "Processing i 2719\n",
      "Token Length--- 17518\n",
      "Processing i 2720\n",
      "Token Length--- 3802\n",
      "Processing i 2721\n",
      "Token Length--- 9481\n",
      "Processing i 2722\n",
      "Token Length--- 8275\n",
      "Processing i 2723\n",
      "Token Length--- 28944\n",
      "Processing i 2724\n",
      "Token Length--- 6866\n",
      "Processing i 2725\n",
      "Token Length--- 6211\n",
      "Processing i 2726\n",
      "Token Length--- 9591\n",
      "Processing i 2727\n",
      "Token Length--- 13870\n",
      "Processing i 2728\n",
      "Token Length--- 1587\n",
      "Processing i 2729\n",
      "Token Length--- 14851\n",
      "Processing i 2730\n",
      "Token Length--- 13588\n",
      "Processing i 2731\n",
      "Token Length--- 1617\n",
      "Processing i 2732\n",
      "Token Length--- 35636\n",
      "Processing i 2733\n",
      "Token Length--- 15954\n",
      "Processing i 2734\n",
      "Token Length--- 8782\n",
      "Processing i 2735\n",
      "Token Length--- 27130\n",
      "Processing i 2736\n",
      "Token Length--- 33981\n",
      "Processing i 2737\n",
      "Token Length--- 18566\n",
      "Processing i 2738\n",
      "Token Length--- 3416\n",
      "Processing i 2739\n",
      "Token Length--- 27268\n",
      "Processing i 2740\n",
      "Token Length--- 16996\n",
      "Processing i 2741\n",
      "Token Length--- 3714\n",
      "Processing i 2742\n",
      "Token Length--- 3148\n",
      "Processing i 2743\n",
      "Token Length--- 2543\n",
      "Processing i 2744\n",
      "Token Length--- 10062\n",
      "Processing i 2745\n",
      "Token Length--- 3282\n",
      "Processing i 2746\n",
      "Token Length--- 7196\n",
      "Processing i 2747\n",
      "Token Length--- 15004\n",
      "Processing i 2748\n",
      "Token Length--- 3537\n",
      "Processing i 2749\n",
      "Token Length--- 17658\n",
      "Processing i 2750\n",
      "Token Length--- 14980\n",
      "Processing i 2751\n",
      "Token Length--- 7358\n",
      "Processing i 2752\n",
      "Token Length--- 1960\n",
      "Processing i 2753\n",
      "Token Length--- 3446\n",
      "Processing i 2754\n",
      "Token Length--- 2345\n",
      "Processing i 2755\n",
      "Token Length--- 1016\n",
      "Processing i 2756\n",
      "Token Length--- 840\n",
      "Processing i 2757\n",
      "Token Length--- 5427\n",
      "Processing i 2758\n",
      "Token Length--- 14594\n",
      "Processing i 2759\n",
      "Token Length--- 7789\n",
      "Processing i 2760\n",
      "Token Length--- 6316\n",
      "Processing i 2761\n",
      "Token Length--- 31138\n",
      "Processing i 2762\n",
      "Token Length--- 13556\n",
      "Processing i 2763\n",
      "Token Length--- 10153\n",
      "Processing i 2764\n",
      "Token Length--- 3342\n",
      "Processing i 2765\n",
      "Token Length--- 1765\n",
      "Processing i 2766\n",
      "Token Length--- 1351\n",
      "Processing i 2767\n",
      "Token Length--- 2159\n",
      "Processing i 2768\n",
      "Token Length--- 13441\n",
      "Processing i 2769\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Token Length--- 19538\n",
      "Processing i 2770\n",
      "Token Length--- 4526\n",
      "Processing i 2771\n",
      "Token Length--- 2142\n",
      "Processing i 2772\n",
      "Token Length--- 8182\n",
      "Processing i 2773\n",
      "Token Length--- 4013\n",
      "Processing i 2774\n",
      "Token Length--- 2406\n",
      "Processing i 2775\n",
      "Token Length--- 22158\n",
      "Processing i 2776\n",
      "Token Length--- 4679\n",
      "Processing i 2777\n",
      "Token Length--- 7242\n",
      "Processing i 2778\n",
      "Token Length--- 4816\n",
      "Processing i 2779\n",
      "Token Length--- 8203\n",
      "Processing i 2780\n",
      "Token Length--- 4799\n",
      "Processing i 2781\n",
      "Token Length--- 438\n",
      "Processing i 2782\n",
      "Token Length--- 18279\n",
      "Processing i 2783\n",
      "Token Length--- 966\n",
      "Processing i 2784\n",
      "Token Length--- 1458\n",
      "Processing i 2785\n",
      "Token Length--- 1359\n",
      "Processing i 2786\n",
      "Token Length--- 6031\n",
      "Processing i 2787\n",
      "Token Length--- 1801\n",
      "Processing i 2788\n",
      "Token Length--- 3648\n",
      "Processing i 2789\n",
      "Token Length--- 3778\n",
      "Processing i 2790\n",
      "Token Length--- 2695\n",
      "Processing i 2791\n",
      "Token Length--- 40813\n",
      "Processing i 2792\n",
      "Token Length--- 10154\n",
      "Processing i 2793\n",
      "Token Length--- 28430\n",
      "Processing i 2794\n",
      "Token Length--- 11220\n",
      "Processing i 2795\n",
      "Token Length--- 17865\n",
      "Processing i 2796\n",
      "Token Length--- 12402\n",
      "Processing i 2797\n",
      "Token Length--- 4665\n",
      "Processing i 2798\n",
      "Token Length--- 1254\n",
      "Processing i 2799\n",
      "Token Length--- 4611\n",
      "Processing i 2800\n",
      "Token Length--- 30473\n",
      "Processing i 2801\n",
      "Token Length--- 1178\n",
      "Processing i 2802\n",
      "Token Length--- 9549\n",
      "Processing i 2803\n",
      "Token Length--- 13081\n",
      "Processing i 2804\n",
      "Token Length--- 11846\n",
      "Processing i 2805\n",
      "Token Length--- 8450\n",
      "Processing i 2806\n",
      "Token Length--- 23536\n",
      "Processing i 2807\n",
      "Token Length--- 7994\n",
      "Processing i 2808\n",
      "Token Length--- 6288\n",
      "Processing i 2809\n",
      "Token Length--- 7292\n",
      "Processing i 2810\n",
      "Token Length--- 4466\n",
      "Processing i 2811\n",
      "Token Length--- 5486\n",
      "Processing i 2812\n",
      "Token Length--- 1049\n",
      "Processing i 2813\n",
      "Token Length--- 25127\n",
      "Processing i 2814\n",
      "Token Length--- 13752\n",
      "Processing i 2815\n",
      "Token Length--- 19642\n",
      "Processing i 2816\n",
      "Token Length--- 9329\n",
      "Processing i 2817\n",
      "Token Length--- 925\n",
      "Processing i 2818\n",
      "Token Length--- 3652\n",
      "Processing i 2819\n",
      "Token Length--- 5787\n",
      "Processing i 2820\n",
      "Token Length--- 11457\n",
      "Processing i 2821\n",
      "Token Length--- 2080\n",
      "Processing i 2822\n",
      "Token Length--- 5165\n",
      "Processing i 2823\n",
      "Token Length--- 9173\n",
      "Processing i 2824\n",
      "Token Length--- 4840\n",
      "Processing i 2825\n",
      "Token Length--- 4862\n",
      "Processing i 2826\n",
      "Token Length--- 1595\n",
      "Processing i 2827\n",
      "Token Length--- 17821\n",
      "Processing i 2828\n",
      "Token Length--- 22294\n",
      "Processing i 2829\n",
      "Token Length--- 7809\n",
      "Processing i 2830\n",
      "Token Length--- 8894\n",
      "Processing i 2831\n",
      "Token Length--- 15972\n",
      "Processing i 2832\n",
      "Token Length--- 26061\n",
      "Processing i 2833\n",
      "Token Length--- 6736\n",
      "Processing i 2834\n",
      "Token Length--- 33254\n",
      "Processing i 2835\n",
      "Token Length--- 1716\n",
      "Processing i 2836\n",
      "Token Length--- 12762\n",
      "Processing i 2837\n",
      "Token Length--- 5191\n",
      "Processing i 2838\n",
      "Token Length--- 2253\n",
      "Processing i 2839\n",
      "Token Length--- 7478\n",
      "Processing i 2840\n",
      "Token Length--- 2926\n",
      "Processing i 2841\n",
      "Token Length--- 23527\n",
      "Processing i 2842\n",
      "Token Length--- 37050\n",
      "Processing i 2843\n",
      "Token Length--- 5815\n",
      "Processing i 2844\n",
      "Token Length--- 12150\n",
      "Processing i 2845\n",
      "Token Length--- 8756\n",
      "Processing i 2846\n",
      "Token Length--- 36028\n",
      "Processing i 2847\n",
      "Token Length--- 14381\n",
      "Processing i 2848\n",
      "Token Length--- 4470\n",
      "Processing i 2849\n",
      "Token Length--- 17662\n",
      "Processing i 2850\n",
      "Token Length--- 13048\n",
      "Processing i 2851\n",
      "Token Length--- 8478\n",
      "Processing i 2852\n",
      "Token Length--- 5502\n",
      "Processing i 2853\n",
      "Token Length--- 3246\n",
      "Processing i 2854\n",
      "Token Length--- 1872\n",
      "Processing i 2855\n",
      "Token Length--- 14287\n",
      "Processing i 2856\n",
      "Token Length--- 5276\n",
      "Processing i 2857\n",
      "Token Length--- 5231\n",
      "Processing i 2858\n",
      "Token Length--- 7860\n",
      "Processing i 2859\n",
      "Token Length--- 2969\n",
      "Processing i 2860\n",
      "Token Length--- 18703\n",
      "Processing i 2861\n",
      "Token Length--- 11558\n",
      "Processing i 2862\n",
      "Token Length--- 9700\n",
      "Processing i 2863\n",
      "Token Length--- 16387\n",
      "Processing i 2864\n",
      "Token Length--- 13167\n",
      "Processing i 2865\n",
      "Token Length--- 2994\n",
      "Processing i 2866\n",
      "Token Length--- 5049\n",
      "Processing i 2867\n",
      "Token Length--- 5223\n",
      "Processing i 2868\n",
      "Token Length--- 12551\n",
      "Processing i 2869\n",
      "Token Length--- 14234\n",
      "Processing i 2870\n",
      "Token Length--- 7669\n",
      "Processing i 2871\n",
      "Token Length--- 12541\n",
      "Processing i 2872\n",
      "Token Length--- 9956\n",
      "Processing i 2873\n",
      "Token Length--- 2859\n",
      "Processing i 2874\n",
      "Token Length--- 8608\n",
      "Processing i 2875\n",
      "Token Length--- 1827\n",
      "Processing i 2876\n",
      "Token Length--- 12031\n",
      "Processing i 2877\n",
      "Token Length--- 5652\n",
      "Processing i 2878\n",
      "Token Length--- 15171\n",
      "Processing i 2879\n",
      "Token Length--- 6180\n",
      "Processing i 2880\n",
      "Token Length--- 4857\n",
      "Processing i 2881\n",
      "Token Length--- 2043\n",
      "Processing i 2882\n",
      "Token Length--- 2029\n",
      "Processing i 2883\n",
      "Token Length--- 5652\n",
      "Processing i 2884\n",
      "Token Length--- 3077\n",
      "Processing i 2885\n",
      "Token Length--- 23090\n",
      "Processing i 2886\n",
      "Token Length--- 4774\n",
      "Processing i 2887\n",
      "Token Length--- 2527\n",
      "Processing i 2888\n",
      "Token Length--- 3504\n",
      "Processing i 2889\n",
      "Token Length--- 1878\n",
      "Processing i 2890\n",
      "Token Length--- 2416\n",
      "Processing i 2891\n",
      "Token Length--- 18250\n",
      "Processing i 2892\n",
      "Token Length--- 5519\n",
      "Processing i 2893\n",
      "Token Length--- 7005\n",
      "Processing i 2894\n",
      "Token Length--- 2263\n",
      "Processing i 2895\n",
      "Token Length--- 12152\n",
      "Processing i 2896\n",
      "Token Length--- 11888\n",
      "Processing i 2897\n",
      "Token Length--- 11300\n",
      "Processing i 2898\n",
      "Token Length--- 2409\n",
      "Processing i 2899\n",
      "Token Length--- 2018\n",
      "Processing i 2900\n",
      "Token Length--- 3277\n",
      "Processing i 2901\n",
      "Token Length--- 1218\n",
      "Processing i 2902\n",
      "Token Length--- 6495\n",
      "Processing i 2903\n",
      "Token Length--- 18496\n",
      "Processing i 2904\n",
      "Token Length--- 14751\n",
      "Processing i 2905\n",
      "Token Length--- 4160\n",
      "Processing i 2906\n",
      "Token Length--- 6364\n",
      "Processing i 2907\n",
      "Token Length--- 5434\n",
      "Processing i 2908\n",
      "Token Length--- 8515\n",
      "Processing i 2909\n",
      "Token Length--- 12071\n",
      "Processing i 2910\n",
      "Token Length--- 5485\n",
      "Processing i 2911\n",
      "Token Length--- 27938\n",
      "Processing i 2912\n",
      "Token Length--- 49867\n",
      "Processing i 2913\n",
      "Token Length--- 3284\n",
      "Processing i 2914\n",
      "Token Length--- 12524\n",
      "Processing i 2915\n",
      "Token Length--- 6547\n",
      "Processing i 2916\n",
      "Token Length--- 1028\n",
      "Processing i 2917\n",
      "Token Length--- 3680\n",
      "Processing i 2918\n",
      "Token Length--- 3458\n",
      "Processing i 2919\n",
      "Token Length--- 10031\n",
      "Processing i 2920\n",
      "Token Length--- 3349\n",
      "Processing i 2921\n",
      "Token Length--- 21218\n",
      "Processing i 2922\n",
      "Token Length--- 8038\n",
      "Processing i 2923\n",
      "Token Length--- 856\n",
      "Processing i 2924\n",
      "Token Length--- 11007\n",
      "Processing i 2925\n",
      "Token Length--- 10558\n",
      "Processing i 2926\n",
      "Token Length--- 14830\n",
      "Processing i 2927\n",
      "Token Length--- 3327\n",
      "Processing i 2928\n",
      "Token Length--- 360\n",
      "Processing i 2929\n",
      "Token Length--- 1273\n",
      "Processing i 2930\n",
      "Token Length--- 18924\n",
      "Processing i 2931\n",
      "Token Length--- 25286\n",
      "Processing i 2932\n",
      "Token Length--- 24922\n",
      "Processing i 2933\n",
      "Token Length--- 3775\n",
      "Processing i 2934\n",
      "Token Length--- 19457\n",
      "Processing i 2935\n",
      "Token Length--- 4616\n",
      "Processing i 2936\n",
      "Token Length--- 10499\n",
      "Processing i 2937\n",
      "Token Length--- 7460\n",
      "Processing i 2938\n",
      "Token Length--- 13810\n",
      "Processing i 2939\n",
      "Token Length--- 16280\n",
      "Processing i 2940\n",
      "Token Length--- 6629\n",
      "Processing i 2941\n",
      "Token Length--- 4803\n",
      "Processing i 2942\n",
      "Token Length--- 11942\n",
      "Processing i 2943\n",
      "Token Length--- 1362\n",
      "Processing i 2944\n",
      "Token Length--- 9433\n",
      "Processing i 2945\n",
      "Token Length--- 8334\n",
      "Processing i 2946\n",
      "Token Length--- 5912\n",
      "Processing i 2947\n",
      "Token Length--- 71339\n",
      "Processing i 2948\n",
      "Token Length--- 36052\n",
      "Processing i 2949\n",
      "Token Length--- 28779\n",
      "Processing i 2950\n",
      "Token Length--- 2701\n",
      "Processing i 2951\n",
      "Token Length--- 4669\n",
      "Processing i 2952\n",
      "Token Length--- 31637\n",
      "Processing i 2953\n",
      "Token Length--- 15828\n",
      "Processing i 2954\n",
      "Token Length--- 2848\n",
      "Processing i 2955\n",
      "Token Length--- 1649\n",
      "Processing i 2956\n",
      "Token Length--- 19671\n",
      "Processing i 2957\n",
      "Token Length--- 14894\n",
      "Processing i 2958\n",
      "Token Length--- 10172\n",
      "Processing i 2959\n",
      "Token Length--- 6801\n",
      "Processing i 2960\n",
      "Token Length--- 1991\n",
      "Processing i 2961\n",
      "Token Length--- 7375\n",
      "Processing i 2962\n",
      "Token Length--- 2935\n",
      "Processing i 2963\n",
      "Token Length--- 4686\n",
      "Processing i 2964\n",
      "Token Length--- 2617\n",
      "Processing i 2965\n",
      "Token Length--- 34047\n",
      "Processing i 2966\n",
      "Token Length--- 17495\n",
      "Processing i 2967\n",
      "Token Length--- 1070\n",
      "Processing i 2968\n",
      "Token Length--- 3614\n",
      "Processing i 2969\n",
      "Token Length--- 25936\n",
      "Processing i 2970\n",
      "Token Length--- 3362\n",
      "Processing i 2971\n",
      "Token Length--- 4090\n",
      "Processing i 2972\n",
      "Token Length--- 10939\n",
      "Processing i 2973\n",
      "Token Length--- 9971\n",
      "Processing i 2974\n",
      "Token Length--- 16321\n",
      "Processing i 2975\n",
      "Token Length--- 15765\n",
      "Processing i 2976\n",
      "Token Length--- 4616\n",
      "Processing i 2977\n",
      "Token Length--- 1998\n",
      "Processing i 2978\n",
      "Token Length--- 3112\n",
      "Processing i 2979\n",
      "Token Length--- 6745\n",
      "Processing i 2980\n",
      "Token Length--- 6248\n",
      "Processing i 2981\n",
      "Token Length--- 5192\n",
      "Processing i 2982\n",
      "Token Length--- 5594\n",
      "Processing i 2983\n",
      "Token Length--- 1763\n",
      "Processing i 2984\n",
      "Token Length--- 18606\n",
      "Processing i 2985\n",
      "Token Length--- 6570\n",
      "Processing i 2986\n",
      "Token Length--- 6868\n",
      "Processing i 2987\n",
      "Token Length--- 22370\n",
      "Processing i 2988\n",
      "Token Length--- 37501\n",
      "Processing i 2989\n",
      "Token Length--- 4478\n",
      "Processing i 2990\n",
      "Token Length--- 9375\n",
      "Processing i 2991\n",
      "Token Length--- 23886\n",
      "Processing i 2992\n",
      "Token Length--- 7791\n",
      "Processing i 2993\n",
      "Token Length--- 2997\n",
      "Processing i 2994\n",
      "Token Length--- 5102\n",
      "Processing i 2995\n",
      "Token Length--- 20157\n",
      "Processing i 2996\n",
      "Token Length--- 2625\n",
      "Processing i 2997\n",
      "Token Length--- 19983\n",
      "Processing i 2998\n",
      "Token Length--- 1504\n",
      "Processing i 2999\n",
      "Token Length--- 3115\n",
      "Processing i 3000\n",
      "Token Length--- 17576\n",
      "Processing i 3001\n",
      "Token Length--- 31121\n",
      "Processing i 3002\n",
      "Token Length--- 20856\n",
      "Processing i 3003\n",
      "Token Length--- 2447\n",
      "Processing i 3004\n",
      "Token Length--- 4246\n",
      "Processing i 3005\n",
      "Token Length--- 3072\n",
      "Processing i 3006\n",
      "Token Length--- 8485\n",
      "Processing i 3007\n",
      "Token Length--- 2666\n",
      "Processing i 3008\n",
      "Token Length--- 6395\n",
      "Processing i 3009\n",
      "Token Length--- 13438\n",
      "Processing i 3010\n",
      "Token Length--- 2612\n",
      "Processing i 3011\n",
      "Token Length--- 10708\n",
      "Processing i 3012\n",
      "Token Length--- 16151\n",
      "Processing i 3013\n",
      "Token Length--- 1593\n",
      "Processing i 3014\n",
      "Token Length--- 20125\n",
      "Processing i 3015\n",
      "Token Length--- 2421\n",
      "Processing i 3016\n",
      "Token Length--- 4997\n",
      "Processing i 3017\n",
      "Token Length--- 973\n",
      "Processing i 3018\n",
      "Token Length--- 23488\n",
      "Processing i 3019\n",
      "Token Length--- 3629\n",
      "Processing i 3020\n",
      "Token Length--- 27246\n",
      "Processing i 3021\n",
      "Token Length--- 12536\n",
      "Processing i 3022\n",
      "Token Length--- 2799\n",
      "Processing i 3023\n",
      "Token Length--- 14906\n",
      "Processing i 3024\n",
      "Token Length--- 4412\n",
      "Processing i 3025\n",
      "Token Length--- 3999\n",
      "Processing i 3026\n",
      "Token Length--- 4488\n",
      "Processing i 3027\n",
      "Token Length--- 17479\n",
      "Processing i 3028\n",
      "Token Length--- 6616\n",
      "Processing i 3029\n",
      "Token Length--- 7318\n",
      "Processing i 3030\n",
      "Token Length--- 4148\n",
      "Processing i 3031\n",
      "Token Length--- 13317\n",
      "Processing i 3032\n",
      "Token Length--- 801\n",
      "Processing i 3033\n",
      "Token Length--- 3778\n",
      "Processing i 3034\n",
      "Token Length--- 6804\n",
      "Processing i 3035\n",
      "Token Length--- 890\n",
      "Processing i 3036\n",
      "Token Length--- 3036\n",
      "Processing i 3037\n",
      "Token Length--- 6367\n",
      "Processing i 3038\n",
      "Token Length--- 4726\n",
      "Processing i 3039\n",
      "Token Length--- 2830\n",
      "Processing i 3040\n",
      "Token Length--- 14125\n",
      "Processing i 3041\n",
      "Token Length--- 2744\n",
      "Processing i 3042\n",
      "Token Length--- 4175\n",
      "Processing i 3043\n",
      "Token Length--- 1690\n",
      "Processing i 3044\n",
      "Token Length--- 2649\n",
      "Processing i 3045\n",
      "Token Length--- 2759\n",
      "Processing i 3046\n",
      "Token Length--- 8503\n",
      "Processing i 3047\n",
      "Token Length--- 10222\n",
      "Processing i 3048\n",
      "Token Length--- 11846\n",
      "Processing i 3049\n",
      "Token Length--- 9914\n",
      "Processing i 3050\n",
      "Token Length--- 2822\n",
      "Processing i 3051\n",
      "Token Length--- 1005\n",
      "Processing i 3052\n",
      "Token Length--- 3823\n",
      "Processing i 3053\n",
      "Token Length--- 8420\n",
      "Processing i 3054\n",
      "Token Length--- 3708\n",
      "Processing i 3055\n",
      "Token Length--- 16171\n",
      "Processing i 3056\n",
      "Token Length--- 24892\n",
      "Processing i 3057\n",
      "Token Length--- 6651\n",
      "Processing i 3058\n",
      "Token Length--- 12400\n",
      "Processing i 3059\n",
      "Token Length--- 17204\n",
      "Processing i 3060\n",
      "Token Length--- 17835\n",
      "Processing i 3061\n",
      "Token Length--- 15846\n",
      "Processing i 3062\n",
      "Token Length--- 2229\n",
      "Processing i 3063\n",
      "Token Length--- 4338\n",
      "Processing i 3064\n",
      "Token Length--- 25156\n",
      "Processing i 3065\n",
      "Token Length--- 5641\n",
      "Processing i 3066\n",
      "Token Length--- 1601\n",
      "Processing i 3067\n",
      "Token Length--- 3102\n",
      "Processing i 3068\n",
      "Token Length--- 1943\n",
      "Processing i 3069\n",
      "Token Length--- 21076\n",
      "Processing i 3070\n",
      "Token Length--- 7259\n",
      "Processing i 3071\n",
      "Token Length--- 4766\n",
      "Processing i 3072\n",
      "Token Length--- 1971\n",
      "Processing i 3073\n",
      "Token Length--- 7636\n",
      "Processing i 3074\n",
      "Token Length--- 4212\n",
      "Processing i 3075\n",
      "Token Length--- 8396\n",
      "Processing i 3076\n",
      "Token Length--- 6109\n",
      "Processing i 3077\n",
      "Token Length--- 12793\n",
      "Processing i 3078\n",
      "Token Length--- 5638\n",
      "Processing i 3079\n",
      "Token Length--- 8583\n",
      "Processing i 3080\n",
      "Token Length--- 11961\n",
      "Processing i 3081\n",
      "Token Length--- 18074\n",
      "Processing i 3082\n",
      "Token Length--- 3151\n",
      "Processing i 3083\n",
      "Token Length--- 16797\n",
      "Processing i 3084\n",
      "Token Length--- 4365\n",
      "Processing i 3085\n",
      "Token Length--- 19974\n",
      "Processing i 3086\n",
      "Token Length--- 3654\n",
      "Processing i 3087\n",
      "Token Length--- 4452\n",
      "Processing i 3088\n",
      "Token Length--- 1142\n",
      "Processing i 3089\n",
      "Token Length--- 3982\n",
      "Processing i 3090\n",
      "Token Length--- 6558\n",
      "Processing i 3091\n",
      "Token Length--- 11302\n",
      "Processing i 3092\n",
      "Token Length--- 2606\n",
      "Processing i 3093\n",
      "Token Length--- 2498\n",
      "Processing i 3094\n",
      "Token Length--- 19118\n",
      "Processing i 3095\n",
      "Token Length--- 9326\n",
      "Processing i 3096\n",
      "Token Length--- 18950\n",
      "Processing i 3097\n",
      "Token Length--- 8898\n",
      "Processing i 3098\n",
      "Token Length--- 17102\n",
      "Processing i 3099\n",
      "Token Length--- 11347\n",
      "Processing i 3100\n",
      "Token Length--- 1906\n",
      "Processing i 3101\n",
      "Token Length--- 4051\n",
      "Processing i 3102\n",
      "Token Length--- 44812\n",
      "Processing i 3103\n",
      "Token Length--- 22442\n",
      "Processing i 3104\n",
      "Token Length--- 21747\n",
      "Processing i 3105\n",
      "Token Length--- 4441\n",
      "Processing i 3106\n",
      "Token Length--- 3262\n",
      "Processing i 3107\n",
      "Token Length--- 1190\n",
      "Processing i 3108\n",
      "Token Length--- 7652\n",
      "Processing i 3109\n",
      "Token Length--- 6634\n",
      "Processing i 3110\n",
      "Token Length--- 3287\n",
      "Processing i 3111\n",
      "Token Length--- 16906\n",
      "Processing i 3112\n",
      "Token Length--- 7555\n",
      "Processing i 3113\n",
      "Token Length--- 10548\n",
      "Processing i 3114\n",
      "Token Length--- 2625\n",
      "Processing i 3115\n",
      "Token Length--- 23027\n",
      "Processing i 3116\n",
      "Token Length--- 764\n",
      "Processing i 3117\n",
      "Token Length--- 2959\n",
      "Processing i 3118\n",
      "Token Length--- 22941\n",
      "Processing i 3119\n",
      "Token Length--- 2859\n",
      "Processing i 3120\n",
      "Token Length--- 4144\n",
      "Processing i 3121\n",
      "Token Length--- 15217\n",
      "Processing i 3122\n",
      "Token Length--- 5888\n",
      "Processing i 3123\n",
      "Token Length--- 20632\n",
      "Processing i 3124\n",
      "Token Length--- 30036\n",
      "Processing i 3125\n",
      "Token Length--- 3447\n",
      "Processing i 3126\n",
      "Token Length--- 4808\n",
      "Processing i 3127\n",
      "Token Length--- 1155\n",
      "Processing i 3128\n",
      "Token Length--- 4465\n",
      "Processing i 3129\n",
      "Token Length--- 40743\n",
      "Processing i 3130\n",
      "Token Length--- 16820\n",
      "Processing i 3131\n",
      "Token Length--- 938\n",
      "Processing i 3132\n",
      "Token Length--- 2901\n",
      "Processing i 3133\n",
      "Token Length--- 5170\n",
      "Processing i 3134\n",
      "Token Length--- 5996\n",
      "Processing i 3135\n",
      "Token Length--- 7326\n",
      "Processing i 3136\n",
      "Token Length--- 12196\n",
      "Processing i 3137\n",
      "Token Length--- 17813\n",
      "Processing i 3138\n",
      "Token Length--- 4296\n",
      "Processing i 3139\n",
      "Token Length--- 17163\n",
      "Processing i 3140\n",
      "Token Length--- 8572\n",
      "Processing i 3141\n",
      "Token Length--- 1749\n",
      "Processing i 3142\n",
      "Token Length--- 10057\n",
      "Processing i 3143\n",
      "Token Length--- 3911\n",
      "Processing i 3144\n",
      "Token Length--- 19128\n",
      "Processing i 3145\n",
      "Token Length--- 20726\n",
      "Processing i 3146\n",
      "Token Length--- 4686\n",
      "Processing i 3147\n",
      "Token Length--- 5964\n",
      "Processing i 3148\n",
      "Token Length--- 7215\n",
      "Processing i 3149\n",
      "Token Length--- 1386\n",
      "Processing i 3150\n",
      "Token Length--- 20756\n",
      "Processing i 3151\n",
      "Token Length--- 3805\n",
      "Processing i 3152\n",
      "Token Length--- 3749\n",
      "Processing i 3153\n",
      "Token Length--- 424\n",
      "Processing i 3154\n",
      "Token Length--- 24174\n",
      "Processing i 3155\n",
      "Token Length--- 11859\n",
      "Processing i 3156\n",
      "Token Length--- 3548\n",
      "Processing i 3157\n",
      "Token Length--- 1664\n",
      "Processing i 3158\n",
      "Token Length--- 10690\n",
      "Processing i 3159\n",
      "Token Length--- 6568\n",
      "Processing i 3160\n",
      "Token Length--- 2436\n",
      "Processing i 3161\n",
      "Token Length--- 3653\n",
      "Processing i 3162\n",
      "Token Length--- 13472\n",
      "Processing i 3163\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Token Length--- 20297\n",
      "Processing i 3164\n",
      "Token Length--- 5234\n",
      "Processing i 3165\n",
      "Token Length--- 7206\n",
      "Processing i 3166\n",
      "Token Length--- 15648\n",
      "Processing i 3167\n",
      "Token Length--- 5353\n",
      "Processing i 3168\n",
      "Token Length--- 7470\n",
      "Processing i 3169\n",
      "Token Length--- 3600\n",
      "Processing i 3170\n",
      "Token Length--- 26281\n",
      "Processing i 3171\n",
      "Token Length--- 1591\n",
      "Processing i 3172\n",
      "Token Length--- 8973\n",
      "Processing i 3173\n",
      "Token Length--- 29054\n",
      "Processing i 3174\n",
      "Token Length--- 3188\n",
      "Processing i 3175\n",
      "Token Length--- 4321\n",
      "Processing i 3176\n",
      "Token Length--- 2238\n",
      "Processing i 3177\n",
      "Token Length--- 14741\n",
      "Processing i 3178\n",
      "Token Length--- 5649\n",
      "Processing i 3179\n",
      "Token Length--- 3037\n",
      "Processing i 3180\n",
      "Token Length--- 7451\n",
      "Processing i 3181\n",
      "Token Length--- 2345\n",
      "Processing i 3182\n",
      "Token Length--- 14890\n",
      "Processing i 3183\n",
      "Token Length--- 2092\n",
      "Processing i 3184\n",
      "Token Length--- 1744\n",
      "Processing i 3185\n",
      "Token Length--- 6991\n",
      "Processing i 3186\n",
      "Token Length--- 5669\n",
      "Processing i 3187\n",
      "Token Length--- 30723\n",
      "Processing i 3188\n",
      "Token Length--- 19340\n",
      "Processing i 3189\n",
      "Token Length--- 1386\n",
      "Processing i 3190\n",
      "Token Length--- 1022\n",
      "Processing i 3191\n",
      "Token Length--- 1310\n",
      "Processing i 3192\n",
      "Token Length--- 3031\n",
      "Processing i 3193\n",
      "Token Length--- 40452\n",
      "Processing i 3194\n",
      "Token Length--- 24315\n",
      "Processing i 3195\n",
      "Token Length--- 4987\n",
      "Processing i 3196\n",
      "Token Length--- 8778\n",
      "Processing i 3197\n",
      "Token Length--- 1643\n",
      "Processing i 3198\n",
      "Token Length--- 1921\n",
      "Processing i 3199\n",
      "Token Length--- 1681\n",
      "Processing i 3200\n",
      "Token Length--- 9237\n",
      "Processing i 3201\n",
      "Token Length--- 6115\n",
      "Processing i 3202\n",
      "Token Length--- 5471\n",
      "Processing i 3203\n",
      "Token Length--- 781\n",
      "Processing i 3204\n",
      "Token Length--- 12427\n",
      "Processing i 3205\n",
      "Token Length--- 17302\n",
      "Processing i 3206\n",
      "Token Length--- 31049\n",
      "Processing i 3207\n",
      "Token Length--- 20565\n",
      "Processing i 3208\n",
      "Token Length--- 12028\n",
      "Processing i 3209\n",
      "Token Length--- 18018\n",
      "Processing i 3210\n",
      "Token Length--- 27834\n",
      "Processing i 3211\n",
      "Token Length--- 910\n",
      "Processing i 3212\n",
      "Token Length--- 7568\n",
      "Processing i 3213\n",
      "Token Length--- 18441\n",
      "Processing i 3214\n",
      "Token Length--- 22777\n",
      "Processing i 3215\n",
      "Token Length--- 28192\n",
      "Processing i 3216\n",
      "Token Length--- 19314\n",
      "Processing i 3217\n",
      "Token Length--- 6886\n",
      "Processing i 3218\n",
      "Token Length--- 15556\n",
      "Processing i 3219\n",
      "Token Length--- 6665\n",
      "Processing i 3220\n",
      "Token Length--- 11415\n",
      "Processing i 3221\n",
      "Token Length--- 2984\n",
      "Processing i 3222\n",
      "Token Length--- 1768\n",
      "Processing i 3223\n",
      "Token Length--- 4864\n",
      "Processing i 3224\n",
      "Token Length--- 700\n",
      "Processing i 3225\n",
      "Token Length--- 10499\n",
      "Processing i 3226\n",
      "Token Length--- 3367\n",
      "Processing i 3227\n",
      "Token Length--- 8989\n",
      "Processing i 3228\n",
      "Token Length--- 15643\n",
      "Processing i 3229\n",
      "Token Length--- 1335\n",
      "Processing i 3230\n",
      "Token Length--- 1735\n",
      "Processing i 3231\n",
      "Token Length--- 10951\n",
      "Processing i 3232\n",
      "Token Length--- 4220\n",
      "Processing i 3233\n",
      "Token Length--- 27108\n",
      "Processing i 3234\n",
      "Token Length--- 20933\n",
      "Processing i 3235\n",
      "Token Length--- 1232\n",
      "Processing i 3236\n",
      "Token Length--- 5709\n",
      "Processing i 3237\n",
      "Token Length--- 3297\n",
      "Processing i 3238\n",
      "Token Length--- 17742\n",
      "Processing i 3239\n",
      "Token Length--- 11323\n",
      "Processing i 3240\n",
      "Token Length--- 6962\n",
      "Processing i 3241\n",
      "Token Length--- 1752\n",
      "Processing i 3242\n",
      "Token Length--- 3808\n",
      "Processing i 3243\n",
      "Token Length--- 6472\n",
      "Processing i 3244\n",
      "Token Length--- 8731\n",
      "Processing i 3245\n",
      "Token Length--- 5116\n",
      "Processing i 3246\n",
      "Token Length--- 295\n",
      "Processing i 3247\n",
      "Token Length--- 4743\n",
      "Processing i 3248\n",
      "Token Length--- 6867\n",
      "Processing i 3249\n",
      "Token Length--- 3305\n",
      "Processing i 3250\n",
      "Token Length--- 1808\n",
      "Processing i 3251\n",
      "Token Length--- 18830\n",
      "Processing i 3252\n",
      "Token Length--- 13094\n",
      "Processing i 3253\n",
      "Token Length--- 5759\n",
      "Processing i 3254\n",
      "Token Length--- 1172\n",
      "Processing i 3255\n",
      "Token Length--- 3514\n",
      "Processing i 3256\n",
      "Token Length--- 620\n",
      "Processing i 3257\n",
      "Token Length--- 7740\n",
      "Processing i 3258\n",
      "Token Length--- 2944\n",
      "Processing i 3259\n",
      "Token Length--- 2615\n",
      "Processing i 3260\n",
      "Token Length--- 3905\n",
      "Processing i 3261\n",
      "Token Length--- 1619\n",
      "Processing i 3262\n",
      "Token Length--- 14452\n",
      "Processing i 3263\n",
      "Token Length--- 6429\n",
      "Processing i 3264\n",
      "Token Length--- 10004\n",
      "Processing i 3265\n",
      "Token Length--- 4886\n",
      "Processing i 3266\n",
      "Token Length--- 18305\n",
      "Processing i 3267\n",
      "Token Length--- 20422\n",
      "Processing i 3268\n",
      "Token Length--- 6998\n",
      "Processing i 3269\n",
      "Token Length--- 9948\n",
      "Processing i 3270\n",
      "Token Length--- 6018\n",
      "Processing i 3271\n",
      "Token Length--- 14291\n",
      "Processing i 3272\n",
      "Token Length--- 23699\n",
      "Processing i 3273\n",
      "Token Length--- 10454\n",
      "Processing i 3274\n",
      "Token Length--- 1495\n",
      "Processing i 3275\n",
      "Token Length--- 9037\n",
      "Processing i 3276\n",
      "Token Length--- 54987\n",
      "Processing i 3277\n",
      "Token Length--- 11520\n",
      "Processing i 3278\n",
      "Token Length--- 1045\n",
      "Processing i 3279\n",
      "Token Length--- 6027\n",
      "Processing i 3280\n",
      "Token Length--- 9390\n",
      "Processing i 3281\n",
      "Token Length--- 9242\n",
      "Processing i 3282\n",
      "Token Length--- 19476\n",
      "Processing i 3283\n",
      "Token Length--- 3906\n",
      "Processing i 3284\n",
      "Token Length--- 16278\n",
      "Processing i 3285\n",
      "Token Length--- 5455\n",
      "Processing i 3286\n",
      "Token Length--- 2190\n",
      "Processing i 3287\n",
      "Token Length--- 1792\n",
      "Processing i 3288\n",
      "Token Length--- 17128\n",
      "Processing i 3289\n",
      "Token Length--- 4999\n",
      "Processing i 3290\n",
      "Token Length--- 4720\n",
      "Processing i 3291\n",
      "Token Length--- 13858\n",
      "Processing i 3292\n",
      "Token Length--- 520\n",
      "Processing i 3293\n",
      "Token Length--- 5950\n",
      "Processing i 3294\n",
      "Token Length--- 4654\n",
      "Processing i 3295\n",
      "Token Length--- 2486\n",
      "Processing i 3296\n",
      "Token Length--- 1630\n",
      "Processing i 3297\n",
      "Token Length--- 503\n",
      "Processing i 3298\n",
      "Token Length--- 1832\n",
      "Processing i 3299\n",
      "Token Length--- 22806\n",
      "Processing i 3300\n",
      "Token Length--- 15446\n",
      "Processing i 3301\n",
      "Token Length--- 2499\n",
      "Processing i 3302\n",
      "Token Length--- 37605\n",
      "Processing i 3303\n",
      "Token Length--- 1769\n",
      "Processing i 3304\n",
      "Token Length--- 2448\n",
      "Processing i 3305\n",
      "Token Length--- 3334\n",
      "Processing i 3306\n",
      "Token Length--- 3247\n",
      "Processing i 3307\n",
      "Token Length--- 11558\n",
      "Processing i 3308\n",
      "Token Length--- 14581\n",
      "Processing i 3309\n",
      "Token Length--- 1515\n",
      "Processing i 3310\n",
      "Token Length--- 5065\n",
      "Processing i 3311\n",
      "Token Length--- 375\n",
      "Processing i 3312\n",
      "Token Length--- 5844\n",
      "Processing i 3313\n",
      "Token Length--- 16083\n",
      "Processing i 3314\n",
      "Token Length--- 2065\n",
      "Processing i 3315\n",
      "Token Length--- 2250\n",
      "Processing i 3316\n",
      "Token Length--- 2138\n",
      "Processing i 3317\n",
      "Token Length--- 20625\n",
      "Processing i 3318\n",
      "Token Length--- 7588\n",
      "Processing i 3319\n",
      "Token Length--- 13265\n",
      "Processing i 3320\n",
      "Token Length--- 8531\n",
      "Processing i 3321\n",
      "Token Length--- 5739\n",
      "Processing i 3322\n",
      "Token Length--- 2028\n",
      "Processing i 3323\n",
      "Token Length--- 2842\n",
      "Processing i 3324\n",
      "Token Length--- 2163\n",
      "Processing i 3325\n",
      "Token Length--- 8927\n",
      "Processing i 3326\n",
      "Token Length--- 6592\n",
      "Processing i 3327\n",
      "Token Length--- 2358\n",
      "Processing i 3328\n",
      "Token Length--- 5094\n",
      "Processing i 3329\n",
      "Token Length--- 6474\n",
      "Processing i 3330\n",
      "Token Length--- 4434\n",
      "Processing i 3331\n",
      "Token Length--- 20938\n",
      "Processing i 3332\n",
      "Token Length--- 7254\n",
      "Processing i 3333\n",
      "Token Length--- 3197\n",
      "Processing i 3334\n",
      "Token Length--- 9922\n",
      "Processing i 3335\n",
      "Token Length--- 1222\n",
      "Processing i 3336\n",
      "Token Length--- 14851\n",
      "Processing i 3337\n",
      "Token Length--- 3362\n",
      "Processing i 3338\n",
      "Token Length--- 15850\n",
      "Processing i 3339\n",
      "Token Length--- 31909\n",
      "Processing i 3340\n",
      "Token Length--- 2350\n",
      "Processing i 3341\n",
      "Token Length--- 547\n",
      "Processing i 3342\n",
      "Token Length--- 10579\n",
      "Processing i 3343\n",
      "Token Length--- 6799\n",
      "Processing i 3344\n",
      "Token Length--- 7507\n",
      "Processing i 3345\n",
      "Token Length--- 23560\n",
      "Processing i 3346\n",
      "Token Length--- 3461\n",
      "Processing i 3347\n",
      "Token Length--- 4263\n",
      "Processing i 3348\n",
      "Token Length--- 23806\n",
      "Processing i 3349\n",
      "Token Length--- 26343\n",
      "Processing i 3350\n",
      "Token Length--- 2501\n",
      "Processing i 3351\n",
      "Token Length--- 6606\n",
      "Processing i 3352\n",
      "Token Length--- 22077\n",
      "Processing i 3353\n",
      "Token Length--- 5965\n",
      "Processing i 3354\n",
      "Token Length--- 10052\n",
      "Processing i 3355\n",
      "Token Length--- 5970\n",
      "Processing i 3356\n",
      "Token Length--- 14605\n",
      "Processing i 3357\n",
      "Token Length--- 9254\n",
      "Processing i 3358\n",
      "Token Length--- 14864\n",
      "Processing i 3359\n",
      "Token Length--- 6676\n",
      "Processing i 3360\n",
      "Token Length--- 2814\n",
      "Processing i 3361\n",
      "Token Length--- 6173\n",
      "Processing i 3362\n",
      "Token Length--- 18498\n",
      "Processing i 3363\n",
      "Token Length--- 864\n",
      "Processing i 3364\n",
      "Token Length--- 15142\n",
      "Processing i 3365\n",
      "Token Length--- 2812\n",
      "Processing i 3366\n",
      "Token Length--- 585\n",
      "Processing i 3367\n",
      "Token Length--- 2409\n"
     ]
    }
   ],
   "source": [
    "i =0\n",
    "for index, row in df_docs.iterrows():\n",
    "        i=i+1\n",
    "        print(\"Processing i\",i)\n",
    "        id_ = row['id']\n",
    "        text = row['text']\n",
    "        title = row['title']\n",
    "        \n",
    "        print(\"Token Length---\",len(text.split(\" \")))\n",
    "        \n",
    "        token_array = text.split(\" \")\n",
    "        \n",
    "        for i in range(len(token_array)):\n",
    "            start =i\n",
    "            end = i+512\n",
    "            context = \"\"\n",
    "            if i < end :\n",
    "                "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d205239a",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "id": "a9bd1c7b",
   "metadata": {},
   "source": [
    "### 2. LongNQ-doc"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "105b7d31",
   "metadata": {},
   "outputs": [],
   "source": [
    "doc_folder_long = '/Users/abhilashamangal/Documents/Semantic Search/data/LongNQ-docs/'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "5cf0a00b",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "questions.tsv\n",
      "docs.tsv\n"
     ]
    }
   ],
   "source": [
    "files_nq = get_all_files(doc_folder_long)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "5df87779",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_question_long = pd.read_csv(files_nq[0],sep = '\\t') "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "aaa1095f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>text</th>\n",
       "      <th>relevant</th>\n",
       "      <th>answers</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>when is the segment making a contribution to t...</td>\n",
       "      <td>-1</td>\n",
       "      <td>-</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>who plays tim allen's wife in the santa clause</td>\n",
       "      <td>-1</td>\n",
       "      <td>-</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>when is the silver chair narnia movie coming out</td>\n",
       "      <td>-1</td>\n",
       "      <td>-</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>tom petty don't come around here no more meaning</td>\n",
       "      <td>79</td>\n",
       "      <td>Stevie Nicks had broken up with Eagles guitari...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>list of celebrities who have a hollywood star</td>\n",
       "      <td>-1</td>\n",
       "      <td>-</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   id                                               text  relevant  \\\n",
       "0   1  when is the segment making a contribution to t...        -1   \n",
       "1   2     who plays tim allen's wife in the santa clause        -1   \n",
       "2   3   when is the silver chair narnia movie coming out        -1   \n",
       "3   4   tom petty don't come around here no more meaning        79   \n",
       "4   5      list of celebrities who have a hollywood star        -1   \n",
       "\n",
       "                                             answers  \n",
       "0                                                  -  \n",
       "1                                                  -  \n",
       "2                                                  -  \n",
       "3  Stevie Nicks had broken up with Eagles guitari...  \n",
       "4                                                  -  "
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_question_long.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "fc8e3e85",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "600"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(df_question_long)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "id": "5f8e3886",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "id": "2190dde8",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_question_long['answers'].replace('-', np.nan, inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "id": "6cffb316",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "300"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_question_long['answers'].isnull().sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "id": "4aaee894",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_question_long.dropna(subset=['answers'], inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "id": "2ab8f3d1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "300"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(df_question_long)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "id": "9e07fc59",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>text</th>\n",
       "      <th>relevant</th>\n",
       "      <th>answers</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>tom petty don't come around here no more meaning</td>\n",
       "      <td>79</td>\n",
       "      <td>Stevie Nicks had broken up with Eagles guitari...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>6</td>\n",
       "      <td>what is the youngest age you can marry in the usa</td>\n",
       "      <td>2096</td>\n",
       "      <td>Unlike most Western countries , 18 of the U.S....</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>8</td>\n",
       "      <td>which industry provides the largest employment...</td>\n",
       "      <td>2562</td>\n",
       "      <td>The IT industry continues to be the largest pr...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>9</td>\n",
       "      <td>when do they throw fish at pike place market</td>\n",
       "      <td>1197</td>\n",
       "      <td>A typical routine will involve a customer orde...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>10</td>\n",
       "      <td>who failed the boards in grey's anatomy</td>\n",
       "      <td>593</td>\n",
       "      <td>At the end of their fifth year residency, the ...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   id                                               text  relevant  \\\n",
       "3   4   tom petty don't come around here no more meaning        79   \n",
       "5   6  what is the youngest age you can marry in the usa      2096   \n",
       "7   8  which industry provides the largest employment...      2562   \n",
       "8   9       when do they throw fish at pike place market      1197   \n",
       "9  10            who failed the boards in grey's anatomy       593   \n",
       "\n",
       "                                             answers  \n",
       "3  Stevie Nicks had broken up with Eagles guitari...  \n",
       "5  Unlike most Western countries , 18 of the U.S....  \n",
       "7  The IT industry continues to be the largest pr...  \n",
       "8  A typical routine will involve a customer orde...  \n",
       "9  At the end of their fifth year residency, the ...  "
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_question_long.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b03c4d78",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "66e9c620",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_docs_long = pd.read_csv(files_nq[1],sep = '\\t') "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "fb4bd56f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>text</th>\n",
       "      <th>title</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>United States courts of appeals - wikipedia &lt;H...</td>\n",
       "      <td>United States courts of appeals</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>Three - point field goal - wikipedia &lt;H1&gt; Thre...</td>\n",
       "      <td>Three - point field goal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>Super Bowl LII - wikipedia &lt;H1&gt; Super Bowl LII...</td>\n",
       "      <td>Super Bowl LII</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>National debt of the United States - Wikipedia...</td>\n",
       "      <td>National debt of the United States</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>Gun laws in Texas - wikipedia &lt;H1&gt; Gun laws in...</td>\n",
       "      <td>Gun laws in Texas</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   id                                               text  \\\n",
       "0   1  United States courts of appeals - wikipedia <H...   \n",
       "1   2  Three - point field goal - wikipedia <H1> Thre...   \n",
       "2   3  Super Bowl LII - wikipedia <H1> Super Bowl LII...   \n",
       "3   4  National debt of the United States - Wikipedia...   \n",
       "4   5  Gun laws in Texas - wikipedia <H1> Gun laws in...   \n",
       "\n",
       "                                title  \n",
       "0     United States courts of appeals  \n",
       "1            Three - point field goal  \n",
       "2                      Super Bowl LII  \n",
       "3  National debt of the United States  \n",
       "4                   Gun laws in Texas  "
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_docs_long.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "e207f178",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3042"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(df_docs_long)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "00f31da5",
   "metadata": {},
   "source": [
    "### 3. Msmarco"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "f21623bb",
   "metadata": {},
   "outputs": [],
   "source": [
    "doc_folder_msmarco = '/Users/abhilashamangal/Documents/Semantic Search/data/msmarco/'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "8e8f502c",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "qrels\n",
      ".DS_Store\n",
      "corpus.jsonl\n",
      "queries.jsonl\n"
     ]
    }
   ],
   "source": [
    "files_msmarco = get_all_files(doc_folder_msmarco)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "7d634fa2",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_corpus = pd.read_json(files_msmarco[2],lines=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "46e4bbc1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "8841823"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(df_corpus)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "cba915fc",
   "metadata": {},
   "outputs": [],
   "source": [
    "pd.set_option('display.max_colwidth', 255)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "765c51fd",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>_id</th>\n",
       "      <th>title</th>\n",
       "      <th>text</th>\n",
       "      <th>metadata</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td></td>\n",
       "      <td>The presence of communication amid scientific minds was equally important to the success of the Manhattan Project as scientific intellect was. The only cloud hanging over the impressive achievement of the atomic researchers and engineers is what their...</td>\n",
       "      <td>{}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td></td>\n",
       "      <td>The Manhattan Project and its atomic bomb helped bring an end to World War II. Its legacy of peaceful uses of atomic energy continues to have an impact on history and science.</td>\n",
       "      <td>{}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td></td>\n",
       "      <td>Essay on The Manhattan Project - The Manhattan Project The Manhattan Project was to see if making an atomic bomb possible. The success of this project would forever change the world forever making it known that something this powerful can be manmade.</td>\n",
       "      <td>{}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td></td>\n",
       "      <td>The Manhattan Project was the name for a project conducted during World War II, to develop the first atomic bomb. It refers specifically to the period of the project from 194 â¦ 2-1946 under the control of the U.S. Army Corps of Engineers, under the ...</td>\n",
       "      <td>{}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td></td>\n",
       "      <td>versions of each volume as well as complementary websites. The first websiteâThe Manhattan Project: An Interactive Historyâis available on the Office of History and Heritage Resources website, http://www.cfo. doe.gov/me70/history. The Office of Hi...</td>\n",
       "      <td>{}</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   _id title  \\\n",
       "0    0         \n",
       "1    1         \n",
       "2    2         \n",
       "3    3         \n",
       "4    4         \n",
       "\n",
       "                                                                                                                                                                                                                                                             text  \\\n",
       "0  The presence of communication amid scientific minds was equally important to the success of the Manhattan Project as scientific intellect was. The only cloud hanging over the impressive achievement of the atomic researchers and engineers is what their...   \n",
       "1                                                                                 The Manhattan Project and its atomic bomb helped bring an end to World War II. Its legacy of peaceful uses of atomic energy continues to have an impact on history and science.   \n",
       "2      Essay on The Manhattan Project - The Manhattan Project The Manhattan Project was to see if making an atomic bomb possible. The success of this project would forever change the world forever making it known that something this powerful can be manmade.   \n",
       "3  The Manhattan Project was the name for a project conducted during World War II, to develop the first atomic bomb. It refers specifically to the period of the project from 194 â¦ 2-1946 under the control of the U.S. Army Corps of Engineers, under the ...   \n",
       "4  versions of each volume as well as complementary websites. The first websiteâThe Manhattan Project: An Interactive Historyâis available on the Office of History and Heritage Resources website, http://www.cfo. doe.gov/me70/history. The Office of Hi...   \n",
       "\n",
       "  metadata  \n",
       "0       {}  \n",
       "1       {}  \n",
       "2       {}  \n",
       "3       {}  \n",
       "4       {}  "
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_corpus.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "89ba51fe",
   "metadata": {},
   "outputs": [],
   "source": [
    "### Queries "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "be762c09",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_queries = pd.read_json(files_msmarco[3],lines=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "a7ac6bcd",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>_id</th>\n",
       "      <th>text</th>\n",
       "      <th>metadata</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1185869</td>\n",
       "      <td>)what was the immediate impact of the success of the manhattan project?</td>\n",
       "      <td>{}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1185868</td>\n",
       "      <td>_________ justice is designed to repair the harm to victim, the community and the offender caused by the offender criminal act. question 19 options:</td>\n",
       "      <td>{}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>597651</td>\n",
       "      <td>what color is amber urine</td>\n",
       "      <td>{}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>403613</td>\n",
       "      <td>is autoimmune hepatitis a bile acid synthesis disorder</td>\n",
       "      <td>{}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1183785</td>\n",
       "      <td>elegxo meaning</td>\n",
       "      <td>{}</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       _id  \\\n",
       "0  1185869   \n",
       "1  1185868   \n",
       "2   597651   \n",
       "3   403613   \n",
       "4  1183785   \n",
       "\n",
       "                                                                                                                                                   text  \\\n",
       "0                                                                               )what was the immediate impact of the success of the manhattan project?   \n",
       "1  _________ justice is designed to repair the harm to victim, the community and the offender caused by the offender criminal act. question 19 options:   \n",
       "2                                                                                                                             what color is amber urine   \n",
       "3                                                                                                is autoimmune hepatitis a bile acid synthesis disorder   \n",
       "4                                                                                                                                        elegxo meaning   \n",
       "\n",
       "  metadata  \n",
       "0       {}  \n",
       "1       {}  \n",
       "2       {}  \n",
       "3       {}  \n",
       "4       {}  "
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_queries.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "1934a425",
   "metadata": {},
   "outputs": [],
   "source": [
    "doc_folder_msmarco_t = '/Users/abhilashamangal/Documents/Semantic Search/data/msmarco/qrels'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "45190158",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "train.tsv\n",
      "dev.tsv\n",
      "test.tsv\n"
     ]
    }
   ],
   "source": [
    "files_msmarco_t = get_all_files(doc_folder_msmarco_t)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "455868c3",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_queries_t = pd.read_csv(files_msmarco_t[0],sep = '\\t')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "f0b16699",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>query-id</th>\n",
       "      <th>corpus-id</th>\n",
       "      <th>score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1185869</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1185868</td>\n",
       "      <td>16</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>597651</td>\n",
       "      <td>49</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>403613</td>\n",
       "      <td>60</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1183785</td>\n",
       "      <td>389</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   query-id  corpus-id  score\n",
       "0   1185869          0      1\n",
       "1   1185868         16      1\n",
       "2    597651         49      1\n",
       "3    403613         60      1\n",
       "4   1183785        389      1"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_queries_t.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "f51e5122",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([1])"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_queries_t['score'].unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "7298c376",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "532751"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(df_queries_t)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "bafc4470",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_queries_d = pd.read_csv(files_msmarco_t[1],sep = '\\t')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "9cfb2432",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>query-id</th>\n",
       "      <th>corpus-id</th>\n",
       "      <th>score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>300674</td>\n",
       "      <td>7067032</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>125705</td>\n",
       "      <td>7067056</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>94798</td>\n",
       "      <td>7067181</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>9083</td>\n",
       "      <td>7067274</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>174249</td>\n",
       "      <td>7067348</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   query-id  corpus-id  score\n",
       "0    300674    7067032      1\n",
       "1    125705    7067056      1\n",
       "2     94798    7067181      1\n",
       "3      9083    7067274      1\n",
       "4    174249    7067348      1"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_queries_d.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "3d2ca876",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "7437"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(df_queries_d)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "2f0a15d1",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_queries_tt = pd.read_csv(files_msmarco_t[2],sep = '\\t')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "e46e27f9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>query-id</th>\n",
       "      <th>corpus-id</th>\n",
       "      <th>score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>19335</td>\n",
       "      <td>1017759</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>19335</td>\n",
       "      <td>1082489</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>19335</td>\n",
       "      <td>109063</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>19335</td>\n",
       "      <td>1160863</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>19335</td>\n",
       "      <td>1160871</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   query-id  corpus-id  score\n",
       "0     19335    1017759      0\n",
       "1     19335    1082489      0\n",
       "2     19335     109063      0\n",
       "3     19335    1160863      0\n",
       "4     19335    1160871      0"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_queries_tt.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "1a2e9503",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "9260"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(df_queries_tt)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ff8ea1a5",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e9dc3133",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "58610063",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "id": "a56d0fbf",
   "metadata": {},
   "source": [
    "### 4. Trec-covid analysis"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "cf34f2cb",
   "metadata": {},
   "outputs": [],
   "source": [
    "doc_folder_trec = '/Users/abhilashamangal/Documents/Semantic Search/data/trec-covid/'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "575647e8",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "qrels\n",
      "corpus.jsonl\n",
      "queries.jsonl\n"
     ]
    }
   ],
   "source": [
    "files_trec = get_all_files(doc_folder_trec)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "a8ba8b03",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_corpus_trec = pd.read_json(files_trec[1],lines=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "id": "8ba914c6",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>_id</th>\n",
       "      <th>title</th>\n",
       "      <th>text</th>\n",
       "      <th>metadata</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>ug7v899j</td>\n",
       "      <td>Clinical features of culture-proven Mycoplasma pneumoniae infections at King Abdulaziz University Hospital, Jeddah, Saudi Arabia</td>\n",
       "      <td>OBJECTIVE: This retrospective chart review describes the epidemiology and clinical features of 40 patients with culture-proven Mycoplasma pneumoniae infections at King Abdulaziz University Hospital, Jeddah, Saudi Arabia. METHODS: Patients with positiv...</td>\n",
       "      <td>{'url': 'https://www.ncbi.nlm.nih.gov/pmc/articles/PMC35282/', 'pubmed_id': '11472636'}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>02tnwd4m</td>\n",
       "      <td>Nitric oxide: a pro-inflammatory mediator in lung disease?</td>\n",
       "      <td>Inflammatory diseases of the respiratory tract are commonly associated with elevated production of nitric oxide (NO•) and increased indices of NO• -dependent oxidative stress. Although NO• is known to have anti-microbial, anti-inflammatory and anti-ox...</td>\n",
       "      <td>{'url': 'https://www.ncbi.nlm.nih.gov/pmc/articles/PMC59543/', 'pubmed_id': '11667967'}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>ejv2xln0</td>\n",
       "      <td>Surfactant protein-D and pulmonary host defense</td>\n",
       "      <td>Surfactant protein-D (SP-D) participates in the innate response to inhaled microorganisms and organic antigens, and contributes to immune and inflammatory regulation within the lung. SP-D is synthesized and secreted by alveolar and bronchiolar epithel...</td>\n",
       "      <td>{'url': 'https://www.ncbi.nlm.nih.gov/pmc/articles/PMC59549/', 'pubmed_id': '11667972'}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2b73a28n</td>\n",
       "      <td>Role of endothelin-1 in lung disease</td>\n",
       "      <td>Endothelin-1 (ET-1) is a 21 amino acid peptide with diverse biological activity that has been implicated in numerous diseases. ET-1 is a potent mitogen regulator of smooth muscle tone, and inflammatory mediator that may play a key role in diseases of ...</td>\n",
       "      <td>{'url': 'https://www.ncbi.nlm.nih.gov/pmc/articles/PMC59574/', 'pubmed_id': '11686871'}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>9785vg6d</td>\n",
       "      <td>Gene expression in epithelial cells in response to pneumovirus infection</td>\n",
       "      <td>Respiratory syncytial virus (RSV) and pneumonia virus of mice (PVM) are viruses of the family Paramyxoviridae, subfamily pneumovirus, which cause clinically important respiratory infections in humans and rodents, respectively. The respiratory epitheli...</td>\n",
       "      <td>{'url': 'https://www.ncbi.nlm.nih.gov/pmc/articles/PMC59580/', 'pubmed_id': '11686888'}</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        _id  \\\n",
       "0  ug7v899j   \n",
       "1  02tnwd4m   \n",
       "2  ejv2xln0   \n",
       "3  2b73a28n   \n",
       "4  9785vg6d   \n",
       "\n",
       "                                                                                                                              title  \\\n",
       "0  Clinical features of culture-proven Mycoplasma pneumoniae infections at King Abdulaziz University Hospital, Jeddah, Saudi Arabia   \n",
       "1                                                                        Nitric oxide: a pro-inflammatory mediator in lung disease?   \n",
       "2                                                                                   Surfactant protein-D and pulmonary host defense   \n",
       "3                                                                                              Role of endothelin-1 in lung disease   \n",
       "4                                                          Gene expression in epithelial cells in response to pneumovirus infection   \n",
       "\n",
       "                                                                                                                                                                                                                                                             text  \\\n",
       "0  OBJECTIVE: This retrospective chart review describes the epidemiology and clinical features of 40 patients with culture-proven Mycoplasma pneumoniae infections at King Abdulaziz University Hospital, Jeddah, Saudi Arabia. METHODS: Patients with positiv...   \n",
       "1  Inflammatory diseases of the respiratory tract are commonly associated with elevated production of nitric oxide (NO•) and increased indices of NO• -dependent oxidative stress. Although NO• is known to have anti-microbial, anti-inflammatory and anti-ox...   \n",
       "2  Surfactant protein-D (SP-D) participates in the innate response to inhaled microorganisms and organic antigens, and contributes to immune and inflammatory regulation within the lung. SP-D is synthesized and secreted by alveolar and bronchiolar epithel...   \n",
       "3  Endothelin-1 (ET-1) is a 21 amino acid peptide with diverse biological activity that has been implicated in numerous diseases. ET-1 is a potent mitogen regulator of smooth muscle tone, and inflammatory mediator that may play a key role in diseases of ...   \n",
       "4  Respiratory syncytial virus (RSV) and pneumonia virus of mice (PVM) are viruses of the family Paramyxoviridae, subfamily pneumovirus, which cause clinically important respiratory infections in humans and rodents, respectively. The respiratory epitheli...   \n",
       "\n",
       "                                                                                  metadata  \n",
       "0  {'url': 'https://www.ncbi.nlm.nih.gov/pmc/articles/PMC35282/', 'pubmed_id': '11472636'}  \n",
       "1  {'url': 'https://www.ncbi.nlm.nih.gov/pmc/articles/PMC59543/', 'pubmed_id': '11667967'}  \n",
       "2  {'url': 'https://www.ncbi.nlm.nih.gov/pmc/articles/PMC59549/', 'pubmed_id': '11667972'}  \n",
       "3  {'url': 'https://www.ncbi.nlm.nih.gov/pmc/articles/PMC59574/', 'pubmed_id': '11686871'}  \n",
       "4  {'url': 'https://www.ncbi.nlm.nih.gov/pmc/articles/PMC59580/', 'pubmed_id': '11686888'}  "
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_corpus_trec.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "id": "0d85e776",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_queries_trec = pd.read_json(files_trec[2],lines=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "ae9db26b",
   "metadata": {},
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'df_queries_trec' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[1], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mdf_queries_trec\u001b[49m\u001b[38;5;241m.\u001b[39mhead()\n",
      "\u001b[0;31mNameError\u001b[0m: name 'df_queries_trec' is not defined"
     ]
    }
   ],
   "source": [
    "df_queries_trec.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "id": "9d88e470",
   "metadata": {},
   "outputs": [],
   "source": [
    "doc_folder_trec_q = '/Users/abhilashamangal/Documents/Semantic Search/data/trec-covid/qrels'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "id": "067e8795",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "test.tsv\n"
     ]
    }
   ],
   "source": [
    "files_trec_q = get_all_files(doc_folder_trec_q)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "id": "f76df818",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_queries_trec_t = pd.read_csv(files_trec_q[0],sep = '\\t')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "id": "d283d642",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "66336"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(df_queries_trec_t)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "id": "876f2622",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>query-id</th>\n",
       "      <th>corpus-id</th>\n",
       "      <th>score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>005b2j4b</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>00fmeepz</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>g7dhmyyo</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>0194oljo</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>021q9884</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>1</td>\n",
       "      <td>02f0opkr</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>1</td>\n",
       "      <td>047xpt2c</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>1</td>\n",
       "      <td>04ftw7k9</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>1</td>\n",
       "      <td>pl9ht0d0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>1</td>\n",
       "      <td>05vx82oo</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>1</td>\n",
       "      <td>0604jed8</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>1</td>\n",
       "      <td>06o2tbon</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>1</td>\n",
       "      <td>06ya15z8</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>1</td>\n",
       "      <td>084o1dmp</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>1</td>\n",
       "      <td>08ds967z</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>1</td>\n",
       "      <td>08efpohc</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>1</td>\n",
       "      <td>0b4o0ccp</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>1</td>\n",
       "      <td>brqby02y</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>1</td>\n",
       "      <td>0chuwvg6</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>1</td>\n",
       "      <td>0cvoeiy0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>1</td>\n",
       "      <td>0e1w86tg</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>1</td>\n",
       "      <td>qotm49rv</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>1</td>\n",
       "      <td>0hnh4n9e</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>1</td>\n",
       "      <td>0iq9s94n</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>1</td>\n",
       "      <td>0khg28ex</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>1</td>\n",
       "      <td>0l33i6s4</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>1</td>\n",
       "      <td>0lyxvex0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>1</td>\n",
       "      <td>0m5mc320</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>1</td>\n",
       "      <td>ex7rta8f</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>1</td>\n",
       "      <td>0nh58odf</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    query-id corpus-id  score\n",
       "0          1  005b2j4b      2\n",
       "1          1  00fmeepz      1\n",
       "2          1  g7dhmyyo      2\n",
       "3          1  0194oljo      1\n",
       "4          1  021q9884      1\n",
       "5          1  02f0opkr      1\n",
       "6          1  047xpt2c      0\n",
       "7          1  04ftw7k9      0\n",
       "8          1  pl9ht0d0      0\n",
       "9          1  05vx82oo      0\n",
       "10         1  0604jed8      0\n",
       "11         1  06o2tbon      0\n",
       "12         1  06ya15z8      0\n",
       "13         1  084o1dmp      0\n",
       "14         1  08ds967z      1\n",
       "15         1  08efpohc      0\n",
       "16         1  0b4o0ccp      0\n",
       "17         1  brqby02y      2\n",
       "18         1  0chuwvg6      2\n",
       "19         1  0cvoeiy0      0\n",
       "20         1  0e1w86tg      1\n",
       "21         1  qotm49rv      1\n",
       "22         1  0hnh4n9e      1\n",
       "23         1  0iq9s94n      1\n",
       "24         1  0khg28ex      0\n",
       "25         1  0l33i6s4      0\n",
       "26         1  0lyxvex0      0\n",
       "27         1  0m5mc320      0\n",
       "28         1  ex7rta8f      2\n",
       "29         1  0nh58odf      2"
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_queries_trec_t.head(30)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "id": "d0a3208b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 2,  1,  0, -1])"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_queries_trec_t['score'].unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "id": "9c6ad867",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,\n",
       "       18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,\n",
       "       35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50])"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_queries_trec_t['query-id'].unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "id": "b55c1e44",
   "metadata": {},
   "outputs": [],
   "source": [
    "doc_folder_hotpotqa = '/Users/abhilashamangal/Documents/Semantic Search/data/hotpotqa'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "id": "240723c5",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "qrels\n",
      "corpus.jsonl\n",
      "queries.jsonl\n"
     ]
    }
   ],
   "source": [
    "files_hotpotqa  = get_all_files(doc_folder_hotpotqa)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "id": "264b43ac",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_corpus_hotpotqa = pd.read_json(files_hotpotqa[1],lines=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "id": "ba062351",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>_id</th>\n",
       "      <th>title</th>\n",
       "      <th>text</th>\n",
       "      <th>metadata</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>12</td>\n",
       "      <td>Anarchism</td>\n",
       "      <td>Anarchism is a political philosophy that advocates self-governed societies based on voluntary institutions. These are often described as stateless societies, although several authors have defined them more specifically as institutions based on non-hie...</td>\n",
       "      <td>{'url': 'https://en.wikipedia.org/wiki?curid=12'}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>25</td>\n",
       "      <td>Autism</td>\n",
       "      <td>Autism is a neurodevelopmental disorder characterized by impaired social interaction, impaired verbal and non-verbal communication, and restricted and repetitive behavior. Parents usually notice signs in the first two years of their child's life. Thes...</td>\n",
       "      <td>{'url': 'https://en.wikipedia.org/wiki?curid=25'}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>39</td>\n",
       "      <td>Albedo</td>\n",
       "      <td>Albedo ( ) is a measure for reflectance or optical brightness (Latin \"albedo,\" \"whiteness\") of a surface. It is dimensionless and measured on a scale from zero (corresponding to a black body that absorbs all incident radiation) to one (corresponding t...</td>\n",
       "      <td>{'url': 'https://en.wikipedia.org/wiki?curid=39'}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>290</td>\n",
       "      <td>A</td>\n",
       "      <td>A (named , plural \"As\", \"A's\", \"a\"s, \"a's\" or \"aes\" ) is the first letter and the first vowel of the ISO basic Latin alphabet. It is similar to the Ancient Greek letter alpha, from which it derives. The upper-case version consists of the two slanting ...</td>\n",
       "      <td>{'url': 'https://en.wikipedia.org/wiki?curid=290'}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>303</td>\n",
       "      <td>Alabama</td>\n",
       "      <td>Alabama ( ) is a state in the southeastern region of the United States. It is bordered by Tennessee to the north, Georgia to the east, Florida and the Gulf of Mexico to the south, and Mississippi to the west. Alabama is the 30th largest by area and th...</td>\n",
       "      <td>{'url': 'https://en.wikipedia.org/wiki?curid=303'}</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   _id      title  \\\n",
       "0   12  Anarchism   \n",
       "1   25     Autism   \n",
       "2   39     Albedo   \n",
       "3  290          A   \n",
       "4  303    Alabama   \n",
       "\n",
       "                                                                                                                                                                                                                                                             text  \\\n",
       "0  Anarchism is a political philosophy that advocates self-governed societies based on voluntary institutions. These are often described as stateless societies, although several authors have defined them more specifically as institutions based on non-hie...   \n",
       "1  Autism is a neurodevelopmental disorder characterized by impaired social interaction, impaired verbal and non-verbal communication, and restricted and repetitive behavior. Parents usually notice signs in the first two years of their child's life. Thes...   \n",
       "2  Albedo ( ) is a measure for reflectance or optical brightness (Latin \"albedo,\" \"whiteness\") of a surface. It is dimensionless and measured on a scale from zero (corresponding to a black body that absorbs all incident radiation) to one (corresponding t...   \n",
       "3  A (named , plural \"As\", \"A's\", \"a\"s, \"a's\" or \"aes\" ) is the first letter and the first vowel of the ISO basic Latin alphabet. It is similar to the Ancient Greek letter alpha, from which it derives. The upper-case version consists of the two slanting ...   \n",
       "4  Alabama ( ) is a state in the southeastern region of the United States. It is bordered by Tennessee to the north, Georgia to the east, Florida and the Gulf of Mexico to the south, and Mississippi to the west. Alabama is the 30th largest by area and th...   \n",
       "\n",
       "                                             metadata  \n",
       "0   {'url': 'https://en.wikipedia.org/wiki?curid=12'}  \n",
       "1   {'url': 'https://en.wikipedia.org/wiki?curid=25'}  \n",
       "2   {'url': 'https://en.wikipedia.org/wiki?curid=39'}  \n",
       "3  {'url': 'https://en.wikipedia.org/wiki?curid=290'}  \n",
       "4  {'url': 'https://en.wikipedia.org/wiki?curid=303'}  "
      ]
     },
     "execution_count": 57,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_corpus_hotpotqa.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "id": "d0cce95f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "5233329"
      ]
     },
     "execution_count": 67,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(df_corpus_hotpotqa)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "id": "a374b215",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_queries_hotpotqa = pd.read_json(files_hotpotqa[2],lines=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "id": "6e0c5809",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>_id</th>\n",
       "      <th>text</th>\n",
       "      <th>metadata</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>5ab6d31155429954757d3384</td>\n",
       "      <td>What country of origin does House of Cosbys and Bill Cosby have in common?</td>\n",
       "      <td>{'answer': 'American', 'supporting_facts': [['House of Cosbys', 0], ['Bill Cosby', 0]]}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>5ac0d92f554299012d1db645</td>\n",
       "      <td>How many fountains where present \"World of Color\" which includes lights,fire,laser and fog which has anthology television series and created by Steve Davison, who is Lead Creative Executive of Parades for Walt Disney and author such a shows as \"Believ...</td>\n",
       "      <td>{'answer': '1,200 musical water fountains', 'supporting_facts': [['Steve Davison', 0], ['Steve Davison', 1], ['World of Color', 1], ['World of Color', 2]]}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>5abd01335542993a06baf9fc</td>\n",
       "      <td>Chris Larceny directed the music video Gon Jock, featuring a Haitian rapper who first achieved fame as a member of what New Jersey hip hop group?</td>\n",
       "      <td>{'answer': 'the Fugees', 'supporting_facts': [['Chris Larceny', 3], ['Wyclef Jean', 0], ['Wyclef Jean', 2]]}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>5abff8c95542994516f4555c</td>\n",
       "      <td>The person where local tradition says Cross Lake is the boyhood home of co-founded what group?</td>\n",
       "      <td>{'answer': 'the Iroquois Confederacy', 'supporting_facts': [['Cross Lake', 1], ['Hiawatha', 0]]}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5adec8ad55429975fa854f8f</td>\n",
       "      <td>The actor who played Carl Sweetchuck in the \"Police Academy\" films wrote the screenplay for \"About Last Night\" along with who else?</td>\n",
       "      <td>{'answer': 'Denise DeClue', 'supporting_facts': [['About Last Night (1986 film)', 1], ['Tim Kazurinsky', 0]]}</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                        _id  \\\n",
       "0  5ab6d31155429954757d3384   \n",
       "1  5ac0d92f554299012d1db645   \n",
       "2  5abd01335542993a06baf9fc   \n",
       "3  5abff8c95542994516f4555c   \n",
       "4  5adec8ad55429975fa854f8f   \n",
       "\n",
       "                                                                                                                                                                                                                                                             text  \\\n",
       "0                                                                                                                                                                                      What country of origin does House of Cosbys and Bill Cosby have in common?   \n",
       "1  How many fountains where present \"World of Color\" which includes lights,fire,laser and fog which has anthology television series and created by Steve Davison, who is Lead Creative Executive of Parades for Walt Disney and author such a shows as \"Believ...   \n",
       "2                                                                                                               Chris Larceny directed the music video Gon Jock, featuring a Haitian rapper who first achieved fame as a member of what New Jersey hip hop group?   \n",
       "3                                                                                                                                                                  The person where local tradition says Cross Lake is the boyhood home of co-founded what group?   \n",
       "4                                                                                                                             The actor who played Carl Sweetchuck in the \"Police Academy\" films wrote the screenplay for \"About Last Night\" along with who else?   \n",
       "\n",
       "                                                                                                                                                      metadata  \n",
       "0                                                                      {'answer': 'American', 'supporting_facts': [['House of Cosbys', 0], ['Bill Cosby', 0]]}  \n",
       "1  {'answer': '1,200 musical water fountains', 'supporting_facts': [['Steve Davison', 0], ['Steve Davison', 1], ['World of Color', 1], ['World of Color', 2]]}  \n",
       "2                                                 {'answer': 'the Fugees', 'supporting_facts': [['Chris Larceny', 3], ['Wyclef Jean', 0], ['Wyclef Jean', 2]]}  \n",
       "3                                                             {'answer': 'the Iroquois Confederacy', 'supporting_facts': [['Cross Lake', 1], ['Hiawatha', 0]]}  \n",
       "4                                                {'answer': 'Denise DeClue', 'supporting_facts': [['About Last Night (1986 film)', 1], ['Tim Kazurinsky', 0]]}  "
      ]
     },
     "execution_count": 59,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_queries_hotpotqa.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "id": "d2b7d563",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "97852"
      ]
     },
     "execution_count": 66,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(df_queries_hotpotqa)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "id": "f21936db",
   "metadata": {},
   "outputs": [],
   "source": [
    "doc_folder_hotpotqa_q = '/Users/abhilashamangal/Documents/Semantic Search/data/hotpotqa/qrels'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bd856327",
   "metadata": {},
   "outputs": [],
   "source": [
    "len()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "id": "2e0ef21b",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "train.tsv\n",
      "dev.tsv\n",
      "test.tsv\n"
     ]
    }
   ],
   "source": [
    "files_hotpotqa_q  = get_all_files(doc_folder_hotpotqa_q)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "id": "930cd2ff",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_hotpotqa_q_t = pd.read_csv(files_hotpotqa_q[0],sep = '\\t')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "id": "c9cf6d28",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>query-id</th>\n",
       "      <th>corpus-id</th>\n",
       "      <th>score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>5ab6d31155429954757d3384</td>\n",
       "      <td>2921047</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>5ab6d31155429954757d3384</td>\n",
       "      <td>158894</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>5ac0d92f554299012d1db645</td>\n",
       "      <td>35694141</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>5ac0d92f554299012d1db645</td>\n",
       "      <td>12775381</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5abd01335542993a06baf9fc</td>\n",
       "      <td>35216810</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   query-id  corpus-id  score\n",
       "0  5ab6d31155429954757d3384    2921047      1\n",
       "1  5ab6d31155429954757d3384     158894      1\n",
       "2  5ac0d92f554299012d1db645   35694141      1\n",
       "3  5ac0d92f554299012d1db645   12775381      1\n",
       "4  5abd01335542993a06baf9fc   35216810      1"
      ]
     },
     "execution_count": 64,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_hotpotqa_q_t.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "id": "046467b3",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([1])"
      ]
     },
     "execution_count": 65,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_hotpotqa_q_t['score'].unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "dfb197b7",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
